1 Introduction

This WebBook contains the code written to analyse data from the study of the functional profile of metagenome-assembled genomes and temporal trends of derived microbial communities from intensively reared broiler chickens.

1.1 Prepare the R environment

1.1.1 Environment

To reproduce all the analyses locally, clone this repository in your computer using:

RStudio > New Project > Version Control > Git

And indicating the following git repository:

https://github.com/alberdilab/chicken_genome_reduced_bacteria.git

Once the R project has been created, follow the instructions and code chunks shown in this webbook.

1.1.2 Libraries

The following R packages are required for the data analysis.

# Base
library(R.utils)
library(knitr)
library(tidyverse)
library(devtools)
library(rmarkdown)
library(janitor)

# For tree handling
library(ape)
library(phyloseq)
library(phytools)
library(tidytree)

# For plotting
library(ggplot2)
library(ggrepel)
library(ggpubr)
library(ggnewscale)
library(gridExtra)
library(ggtreeExtra)
library(ggtree)
library(ggh4x)
library(sjPlot)
library(colorspace)
library(vioplot)

# For statistics
library(spaa)
library(vegan)
library(Rtsne)
library(microbiome)
library(geiger)
library(hilldiv2)
library(distillR)
library(broom.mixed)
library(Hmsc)
library(MuMIn)
library(corrplot)
library(lme4)
library(nlme)
library(boot)

2 Metagenomic data preparation

2.1 Load data

Load the original data files outputted by the bioinformatic pipeline.

2.1.1 Chicken sample metadata

chicken_metadata <- 
  read_tsv("data/metadata.tsv") %>%
  mutate(sampling_time = factor(sampling_time,
                                levels = c('7', '21', '35')))

2.1.2 Taxonomy of metagenome-assembled genomes

genome_taxonomy <- 
  read_tsv("data/taxonomy.tsv") %>%
  rename(genome = 1) %>%
  mutate(phylum = case_when(
        phylum == "Actinobacteriota" ~ "Actinomycetota",
        phylum == "Firmicutes" ~ "Bacillota",
        phylum == "Firmicutes_A" ~ "Bacillota_A",
        phylum == "Firmicutes_B" ~ "Bacillota_B",
        phylum == "Firmicutes_C" ~ "Bacillota_C",
        phylum == "Cyanobacteria" ~ "Cyanobacteriota",
        phylum == "Proteobacteria" ~ "Pseudomonadota",
        TRUE ~ phylum)) %>% 
  filter(!domain == "Archaea")

2.1.3 Genome statistics

genome_stats <- 
  read_tsv("data/stats.tsv") %>%
  rename(genome = 1) %>%
  mutate(correction_factor = median(mag_length) / mag_length) %>% 
  filter(genome %in% genome_taxonomy$genome) %>% 
  arrange(match(genome, genome_taxonomy$genome))

2.1.4 Phylogenetic tree

genome_tree <- read_tree("data/tree.nwk")

2.1.5 Raw gene annotations

if(!file.exists("data/gene_annotations.tsv.xz")){
  download.file(
    url = 'https://sid.erda.dk/share_redirect/Bd8UfDO2D6/gene_annotations.tsv.xz',
    destfile = 'data/gene_annotations.tsv.xz', method = 'curl'
    )
}

genome_annotations <- read_tsv("data/gene_annotations.tsv.xz")

2.1.6 DRAM functional annotations

genome_kegg_paths <-
  read_tsv("data/dram.tsv") %>%
  rename(genome = 1) %>% 
  filter(genome %in% genome_taxonomy$genome) %>% 
  select(1:400)

2.1.7 Metagenomic read counts

read_counts <- 
  read_tsv("data/mag_counts.tsv") %>% 
  rename(genome = 1) %>% 
  filter(genome %in% genome_taxonomy$genome) 

2.2 Distilling Genome Inferred Functional Traits (GIFTs)

2.2.1 Distilling DRAM annotations

gifts_raw <- distill(genome_annotations, GIFT_db2,
                     genomecol = 2, annotcol = c(9,10,19), verbosity = F)

# Load ENA to genome_id table
ena_to_mag_id <- read_tsv("data/ena_to_mag_id.tsv")

gifts_matrix <-
  gifts_raw %>%
  data.frame() %>%
  rownames_to_column(var = 'mag_name') %>%
  left_join(ena_to_mag_id %>%
              select(mag_name, mag_id),
            by = 'mag_name') %>%
  select(-mag_name) %>%
  relocate(mag_id) %>%
  filter(mag_id %in% genome_taxonomy$genome) %>% 
  column_to_rownames('mag_id') %>%
  as.matrix()

2.2.2 Perform completeness correction for GIFTs

# Aggregate into compound level
gifts_elements <- to.elements(gifts_matrix, GIFT_db2)

# Aggregate into function level
gifts_functions <- to.functions(gifts_elements, GIFT_db2)

# Aggregate into overall domain level
gifts_domains <- to.domains(gifts_functions, GIFT_db2)

2.3 Create working objects

Transform the original data files into working objects for downstream analyses.

2.3.1 Transform reads into genome counts

mag_weighted <-
  read_counts %>% 
  column_to_rownames(var = 'genome') %>% 
  mutate(across(where(is.numeric), ~ ./ genome_stats$correction_factor)) %>% 
  t() %>% 
  as.data.frame()

2.3.2 Calculate relative abundances

# Relative abundances
total <-
  decostand(mag_weighted, 'total') %>%
  t() %>%
  as.data.frame()  

# Square root of relative abundances
hel <-
  decostand(mag_weighted, 'hellinger') %>%
  t() %>%
  as.data.frame() 

# Transpose matrix
mag_weighted <-
  mag_weighted %>% 
  t() %>% 
  as.data.frame()

2.3.3 Prepare a phyloseq object

phylo_samples <- 
  chicken_metadata %>% 
  mutate(sampling_time = as.factor(sampling_time),
         trial_age = paste(trial, sampling_time, sep = "_"),
         trial_age = as.factor(trial_age)) %>% 
  column_to_rownames("animal_code") %>% 
  sample_data() # Convert to phyloseq sample_data object

phylo_genome <- otu_table(total, taxa_are_rows = TRUE) 

phylo_taxonomy <- 
  genome_taxonomy %>%
  column_to_rownames("genome") %>% 
  as.matrix() %>% 
  tax_table() # Convert to phyloseq tax_table object

phylo_tree <- phy_tree(genome_tree) 

phylo_seq <- phyloseq(phylo_genome, phylo_taxonomy, phylo_samples, phylo_tree)

2.4 Prepare color scheme

# As dataframe
taxonomy_colors <- read_tsv("data/taxonomy_colours.tsv") 

# As vector
phylum_colors <- 
  c(Halobacteriota = "#f2f2f2",
    Methanobacteriota = "#bfbfbf",
    Patescibacteria = "#dacce3",
    Campylobacterota = "#cdadca",
    Synergistota = "#c08eb3",
    Thermoplasmatota = "#777dae",
    Verrucomicrobiota = "#0066ae",
    Desulfobacterota = "#68b0dc",
    Pseudomonadota = "#60cfde",
    Bacteroidota = "#4dc87c",
    Cyanobacteriota = "#92e09f",
    Actinomycetota = "#c9e0af",
    Deferribacterota = "#dff77e",
    Bacillota = "#ffd366",
    Bacillota_A = "#fd8854",
    Bacillota_B = "#8b1222",
    Bacillota_C = "#5a0c37")

order_colors <- 
  c(Methanomicrobiales = "#f2f2f2",
    Methanobacteriales = "#bfbfbf",
    Saccharimonadales = "#dacce3",
    Campylobacterales = "#cdadca",
    Synergistales = "#c08eb3",
    Methanomassiliicoccales = "#777dae",
    Victivallales = "#0066ae",
    Opitutales = "#1c7ebc",
    Verrucomicrobiales = "#4a96cc",
    Desulfovibrionales = "#68b0dc",
    Enterobacterales = "#60cfde",
    RF32 = "#60dfd2",
    Burkholderiales = "#5cdfb5",
    'Rs-D84' = "#40df91",
    Bacteroidales = "#4dc87c",
    Flavobacteriales = "#88c88b",
    Gastranaerophilales = "#92e09f",
    Coriobacteriales = "#c9e0af",
    Actinomycetales = "#d8e093",
    Deferribacterales = "#dff77e",
    RF39 = "#ecf76d",
    Lactobacillales = "#feef68",
    'ML615J-28' = "#fde671",
    Erysipelotrichales = "#ffd366",
    Acholeplasmatales = "#fdc151",
    Bacillales = "#fc953d",
    RFN20 = "#fd8035",
    Oscillospirales = "#fd8854",
    TANB77 = "#f4814d",
    Christensenellales = "#c26340",
    Lachnospirales = "#c28c5c",
    Clostridiales = "#ce9360",
    Monoglobales_A = "#ce734f",
    Peptostreptococcales = "#c65631",
    Monoglobales = "#b93725",
    UBA1212 = "#b10f19",
    UBA4068 = "#8b1222",
    Selenomonadales = "#7a1a12",
    Acidaminococcales = "#64121f",
    Veillonellales = "#5a0c37")
sampling_day_colors <- c('#E69F00', '#CC6677', '#56B4E9')

2.5 Wrap working objects

All working objects are wrapped into a single Rdata object to facilitate downstream usage.

save(
  # Chicken data
  chicken_metadata,
  
  # Bacterial genome data
  genome_taxonomy, 
  genome_stats, 
  genome_tree, 
  genome_kegg_paths,
  read_counts,
  
  # Transformed data
  mag_weighted,
  total,
  hel, 
  
  # Phyloseq objects
  phylo_seq,

  # Functions
  gifts_elements,
  gifts_functions,
  gifts_domains,
  
  # Colors
  taxonomy_colors,
  phylum_colors,
  order_colors,
  sampling_day_colors,
  
  # Define file path
  file = "data/data_mg.Rdata")
rm(list = ls())

3 Metatranscriptomic data preparation

3.1 Load data

3.1.1 Metatranscriptomic gene expression counts

if(!file.exists("data/gene_expressions.tsv.xz")){
  download.file(
    url = 'https://sid.erda.dk/share_redirect/Bd8UfDO2D6/gene_expressions.tsv.xz',
    destfile = 'data/gene_expressions.tsv.xz', method = 'curl'
    )
}

gene_expr <- 
  read_tsv("data/gene_expressions.tsv.xz") %>% 
  select(1:129) %>%
  rename(mag_name = 'MAG') %>%
  relocate(mag_name)

# Chunk analysis to sets of 100 MAGs
mags <-  sort(unique(gene_expr$mag_name))

gene_expr_1 <- gene_expr[gene_expr$mag_name %in% mags[c(1:100)],c(2:129)]
gene_expr_2 <- gene_expr[gene_expr$mag_name %in% mags[c(101:200)],c(2:129)]
gene_expr_3 <- gene_expr[gene_expr$mag_name %in% mags[c(201:300)],c(2:129)]
gene_expr_4 <- gene_expr[gene_expr$mag_name %in% mags[c(301:400)],c(2:129)]
gene_expr_5 <- gene_expr[gene_expr$mag_name %in% mags[c(401:500)],c(2:129)]
gene_expr_6 <- gene_expr[gene_expr$mag_name %in% mags[c(501:600)],c(2:129)]
gene_expr_7 <- gene_expr[gene_expr$mag_name %in% mags[c(601:700)],c(2:129)]
gene_expr_8 <- gene_expr[gene_expr$mag_name %in% mags[c(701:825)],c(2:129)]


save(gene_expr_1, file = "data/gene_expr_1.Rdata")
save(gene_expr_2, file = "data/gene_expr_2.Rdata")
save(gene_expr_3, file = "data/gene_expr_3.Rdata")
save(gene_expr_4, file = "data/gene_expr_4.Rdata")
save(gene_expr_5, file = "data/gene_expr_5.Rdata")
save(gene_expr_6, file = "data/gene_expr_6.Rdata")
save(gene_expr_7, file = "data/gene_expr_7.Rdata")
save(gene_expr_8, file = "data/gene_expr_8.Rdata")

rm(list = ls())

3.1.2 Distillation

mag_ann <-
  read_tsv("data/gene_annotations.tsv.xz") %>%
  mutate(gene_length = end_position - start_position)

# Chunk analysis
load("data/gene_expr_1.Rdata")
distq_1 <- distillq(gene_expr_1, mag_ann, GIFT_db2,
                    genecol = 1, genomecol = 2, annotcol = c(9, 10, 19))
save(distilled_caecum_1, file = "data/distilled_caecum_1.Rdata")
rm(gene_expr_1, distilled_caecum_1)

load("data/gene_expr_2.Rdata")
distq_2 <- distillq(gene_expr_2, mag_ann, GIFT_db2,
                    genecol = 1, genomecol = 2, annotcol = c(9, 10, 19))
save(distilled_caecum_2, file = "data/distilled_caecum_2.Rdata")
rm(gene_expr_2, distilled_caecum_2)

load("data/gene_expr_3.Rdata")
distq_3 <- distillq(gene_expr_3, mag_ann, GIFT_db2,
                    genecol = 1, genomecol = 2, annotcol = c(9, 10, 19))
save(distilled_caecum_3, file = "results/tables/distilled_caecum_3.Rdata")
rm(gene_expr_3, distilled_caecum_3)

load("data/gene_expr_4.Rdata")
distq_4 <- distillq(gene_expr_4, mag_ann, GIFT_db2,
                    genecol = 1, genomecol = 2, annotcol = c(9, 10, 19))
save(distilled_caecum_4, file = "results/tables/distilled_caecum_4.Rdata")
rm(gene_expr_4, distilled_caecum_4)

load("data/gene_expr_5.Rdata")
distq_5 <- distillq(gene_expr_5, mag_ann, GIFT_db2,
                    genecol = 1, genomecol = 2, annotcol = c(9, 10, 19))
save(distilled_caecum_5, file = "results/tables/distilled_caecum_5.Rdata")
rm(gene_expr_5, distilled_caecum_5)

load("data/gene_expr_6.Rdata")
distq_6 <- distillq(gene_expr_6, mag_ann, GIFT_db2,
                    genecol = 1, genomecol = 2, annotcol = c(9, 10, 19))
save(distilled_caecum_6, file = "results/tables/distilled_caecum_6.Rdata")
rm(gene_expr_6, distilled_caecum_6)

load("data/gene_expr_7.Rdata")
distq_7 <- distillq(gene_expr_7, mag_ann, GIFT_db2,
                    genecol = 1, genomecol = 2, annotcol = c(9, 10, 12))
save(distilled_caecum_7, file = "results/tables/distilled_caecum_7.Rdata")
rm(gene_expr_7, distilled_caecum_7)

load("results/tables/gene_expr_8.Rdata")
distq_8 <- distillq(gene_expr_8, mag_ann, GIFT_db2,
                    genecol = 1, genomecol = 2, annotcol = c(9, 10, 12))
save(distilled_caecum_8, file = "results/tables/distilled_caecum_8.Rdata")
rm(gene_expr_8, distilled_caecum_8)

3.1.3 Combine metatranscriptomic GIFT expression counts

load("data/distilled_caecum_1.Rdata")
load("data/distilled_caecum_2.RData")
load("data/distilled_caecum_3.RData")
load("data/distilled_caecum_4.RData")
load("data/distilled_caecum_5.RData")
load("data/distilled_caecum_6.RData")
load("data/distilled_caecum_7.RData")
load("data/distilled_caecum_8.RData")

expr_counts_raw <- c(distilled_expression_caecum_1, distilled_expression_caecum_2,
                     distilled_expression_caecum_3, distilled_expression_caecum_4,
                     distilled_expression_caecum_5, distilled_expression_caecum_6,
                     distilled_expression_caecum_7, distilled_expression_caecum_8)

rm(distilled_expression_caecum_1, distilled_expression_caecum_2,
    distilled_expression_caecum_3, distilled_expression_caecum_4,
    distilled_expression_caecum_5, distilled_expression_caecum_6,
    distilled_expression_caecum_7, distilled_expression_caecum_8)

# Change MAG IDs for a standardised code 
ena_to_mag_id <- read_tsv("data/ena_to_mag_id.tsv")

mag_name <- names(expr_counts_raw)

names(expr_counts_raw) <- ena_to_mag_id$mag_id[match(mag_name, ena_to_mag_id$mag_name)]

# Correct animal codes
expr_counts <- lapply(expr_counts_raw, function(x) {
  rownames(x) <- gsub("F1a", "", rownames(x))
  x
})

3.1.4 Metadata of chickens sequenced for MT

animal_codes_mt <- rownames(expr_counts$cmag_001) %>% 
  gsub("F1a", "", .)

chicken_metadata_mt <- 
  read_tsv("data/metadata.tsv") %>%
  mutate(sampling_time = factor(sampling_time, levels = c('7',
                                                          '21',
                                                          '35'))) %>% 
  filter(animal_code %in% animal_codes_mt)

3.2 Save working objects

save(expr_counts, chicken_metadata_mt, file = "data/data_mt.Rdata")
rm(list = ls())

4 MAG catalogue richness

4.1 Load MG data

load("data/data_mg.Rdata")

4.2 Densities of overall relative abundances by phylum

4.2.1 Calculate densities

sum_ind <-
  total %>%
  rownames_to_column(var = 'genome') %>%
  left_join(genome_taxonomy %>% select(genome, phylum), by = 'genome') %>%
  mutate(phylum = factor(phylum, levels = unique(names(phylum_colors)))) %>%
  pivot_longer(-c(genome, phylum), values_to = 'value', names_to = 'animal_code') %>%
  group_by(phylum, animal_code) %>%
  summarise(total_ind = sum(value), mean_ind = mean(value))

sum_phylum <-
  sum_ind %>%
  group_by(phylum) %>%
  summarise(total_phylum = sum(total_ind), mean_phylum = mean(mean_ind))

4.2.2 Filter rare phylums

count_taxa <-
  sum_ind %>%
  filter(phylum %in% c('Bacillota_A',
                       'Bacillota',
                       'Bacteroidota',
                       'Cyanobacteriota',
                       'Pseudomonadota',
                       'Actinomycetota',
                       'Verrucomicrobiota',
                       'Campylobacterota'))

4.2.3 Plot curves

The resulting plot corresponds to Figure 1a.

ggplot(count_taxa, aes(x = total_ind, colour = phylum, fill = phylum)) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = phylum_colors) +
  scale_color_manual(values = phylum_colors) +
  scale_x_log10() +
  xlab("Relative abundance (log-transformed)") +
  ylab("Density") +
  theme_minimal() +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.text.y = element_blank(),
    axis.title.x = element_text(size = 8),
    axis.title.y = element_text(size = 8),
    axis.text.x = element_text(size = 8),
    legend.position = "bottom") 

4.3 Functional ordination - PCoA with PCA loadings

gift_pcoa <- 
  gifts_elements %>% 
  as.data.frame() %>%
  vegdist(method="euclidean") %>%
  pcoa()

gift_pcoa_rel_eigen <- gift_pcoa$values$Relative_eig[1:10]

# Get genome positions
gift_pcoa_vectors <- 
  gift_pcoa$vectors %>%  # extract vectors
  as.data.frame() %>% 
  select(Axis.1,Axis.2)  # keep the first 2 axes

gift_pcoa_eigenvalues <- gift_pcoa$values$Eigenvalues[c(1,2)]

gift_pcoa_gifts <- 
  cov(gifts_elements, scale(gift_pcoa_vectors)) %*% diag((gift_pcoa_eigenvalues/(nrow(gifts_elements)-1))^(-0.5)) %>%
  as.data.frame() %>% 
  rename(Axis.1 = 1, Axis.2 = 2) %>% 
  rownames_to_column(var = "label") %>% 
  mutate(func = substr(label,1,3)) %>% 
  group_by(func) %>% 
  summarise(Axis.1 = mean(Axis.1),
            Axis.2 = mean(Axis.2)) %>% 
  rename(label = func) %>% 
  filter(!label %in% c("S01","S02","S03"))

The resulting plot corresponds to Figure 1b.

scale <- 20 # scale for vector loadings

gift_pcoa_vectors %>% 
  rownames_to_column(var = "genome") %>% 
  left_join(genome_stats, by = "genome") %>%
  left_join(genome_taxonomy, by = "genome") %>%
  ggplot() +
      # genome positions
      scale_color_manual(values = order_colors)+
      geom_point(aes(x = Axis.1, y = Axis.2, color = order, size = mag_length), 
                 alpha = 0.9, shape = 16) +
      # scale_color_manual(values=phylum_colors) +
      scale_size_continuous(range = c(0.05, 4)) +
      # loading positions
      geom_segment(data = gift_pcoa_gifts, 
                   aes(x = 0, y = 0, xend = Axis.1 * scale, yend = Axis.2 * scale),
                    arrow = arrow(length = unit(0.2, "cm"), 
                    type = "open", 
                    angle = 25),
                    linewidth = 0.2, 
                    color = "black") +
     # primary and secondary scale adjustments
     scale_x_continuous(name = paste0("PCoA1 (",round(gift_pcoa_rel_eigen[1]*100, digits = 2), " %)"),
                        sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA1")) +
     scale_y_continuous(name = paste0("PCoA2 (",round(gift_pcoa_rel_eigen[2]*100, digits = 2), " %)"), 
                        sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA2")) +
     geom_label_repel(data = gift_pcoa_gifts,
                      aes(label = label, x = Axis.1 * scale, y = Axis.2 * scale),
                      segment.color = 'transparent',
                      size = 3) +
    theme_minimal() + 
    theme(legend.position = "bottom",
          axis.title.x = element_text(size = 12),
          axis.title.y = element_text(size = 12),
          axis.text.x = element_text(size = 10),
          axis.text.y = element_text(size = 10),
          text = element_text(size = 8))

4.3.1 Plot genome length and average MCI gradients

# Preparing data for ploting gradients
overall_mci <-
  gifts_elements %>%
  as.data.frame() %>% 
  rownames_to_column(var = "genome") %>%
  rowwise() %>%
  mutate(overall = mean(c_across(B0101:S0301))) %>%
  select(genome, overall)

gift_pcoa_vectors_with_metadata <- 
  gift_pcoa_vectors %>% 
  rownames_to_column(var = "genome") %>% 
  left_join(overall_mci, by = "genome") %>%
  left_join(genome_stats %>%
              select(genome, mag_length), by = "genome") %>% 
  left_join(gifts_functions %>% 
              as.data.frame %>% 
              rownames_to_column(var = "genome") %>%
              select(genome, B04, B07))
  
scale_factor <- max(gift_pcoa_vectors_with_metadata$mag_length, na.rm = TRUE) /
                max(gift_pcoa_vectors_with_metadata$overall, na.rm = TRUE)


# Axis 1 gradients
gift_pcoa_vectors_with_metadata %>% 
  ggplot(aes(x = Axis.1)) +
    geom_smooth(aes(y = mag_length), color = "darkblue") +
    geom_smooth(aes(y = overall * scale_factor), color = "darkgreen") +
    scale_y_continuous(name = "MAG length",
                       sec.axis = sec_axis(~ . / scale_factor, name = "Overall MCI")) +
  theme_minimal() +
    theme(
      axis.title.y.left = element_text(color = "darkblue"),
      axis.title.y.right = element_text(color = "darkgreen"),
      axis.title.x = element_text(size = 12),
      axis.title.y = element_text(size = 12),
      axis.text.x = element_text(size = 10),
      axis.text.y = element_text(size = 10),
      text = element_text(size = 8),
      legend.position = "none")

4.3.2 Plot SCFA and vitamin biosynthesis gradients

# Axis 2 gradients
scale_factor <- max(gift_pcoa_vectors_with_metadata$B04, na.rm = TRUE) /
                max(gift_pcoa_vectors_with_metadata$B07, na.rm = TRUE)

gift_pcoa_vectors_with_metadata %>% 
  ggplot(aes(x = Axis.2)) +
    geom_smooth(aes(y = B04), color = "#ee3237") +
    geom_smooth(aes(y = B07 * scale_factor), color = "#a61f5e") +
    scale_y_continuous(name = "SCFA biosynthesis",
                       sec.axis = sec_axis(~ . / scale_factor, name = "Vitamin biosynthesis")) +
  theme_minimal() +
    theme(
      axis.title.y.left = element_text(color = "#ee3237"),
      axis.title.y.right = element_text(color = "#a61f5e"),
      axis.title.x = element_text(size = 12),
      axis.title.y = element_text(size = 12),
      axis.text.x = element_text(size = 10),
      axis.text.y = element_text(size = 10),
      text = element_text(size = 8),
      legend.position = "none")

4.4 Functional ordination - interactive

library("plotly")
scale <- 20 # scale for vector loadings

pcoa_interactive <- 
  gift_pcoa_vectors %>% 
  rownames_to_column(var = "genome") %>% 
  left_join(genome_stats, by = "genome") %>%
  left_join(genome_taxonomy, by = "genome") %>%
  mutate(tooltip = paste(family, "//", genome)) %>%
  ggplot() +
      # genome positions
      scale_color_manual(values = order_colors)+
      geom_point(aes(x = Axis.1, y = Axis.2, color = order, size = mag_length, text = tooltip), 
                 alpha = 0.9, shape = 16) +
      scale_size_continuous(range = c(0.05,4)) +
     # primary and secondary scale adjustments
     scale_x_continuous(name = paste0("PCoA1 (",round(gift_pcoa_rel_eigen[1]*100, digits = 2), " %)")) +
     scale_y_continuous(name = paste0("PCoA2 (",round(gift_pcoa_rel_eigen[2]*100, digits = 2), " %)")) +
    theme_minimal() + 
    theme(legend.position = "none")

ggplotly(pcoa_interactive, tooltip = "text")

4.5 Compare MCI and genome length between taxonomic groups

The resulting plot corresponds to Supplementary Figure S1.

main_phylums <- c("Bacillota_A", "Bacillota", "Bacteroidota",
                  "Cyanobacteriota","Pseudomonadota", "Actinomycetota",
                  "Verrucomicrobiota", "Campylobacterota"
                  )

selected_taxonomy_colors <- 
  taxonomy_colors %>% 
  filter(phylum %in% main_phylums)

selected_orders <- 
  selected_taxonomy_colors %>% 
  select(order, color_order) %>% 
  filter(order != "Gastranaerophilales") %>% 
  filter(order != "Actinomycetales") %>% 
  filter(order != "Coriobacteriales")

selected_groups <- 
  selected_taxonomy_colors %>% 
  select(phylum, color_phylum) %>% 
  distinct(phylum, color_phylum) %>%
  filter(color_phylum != '#c28c5c') %>% 
  rename(order = 'phylum') %>% 
  rename(color_order = 'color_phylum') %>% 
  bind_rows(., selected_orders) %>% 
  mutate(order = factor(order, levels = c("Campylobacterota", 
                                          "Verrucomicrobiota",
                                          "Actinomycetota",
                                          "Enterobacterales",     
                                          "RF32",                 
                                          "Burkholderiales",      
                                          "Rs-D84",              
                                          "Pseudomonadota",  
                                          "Gastranaerophilales",
                                          "Cyanobacteriota", 
                                          "Bacteroidales",        
                                          "Flavobacteriales",
                                          "Bacteroidota", #
                                          "RF39",                 
                                          "Lactobacillales",      
                                          "ML615J-28",           
                                          "Erysipelotrichales",   
                                          "Acholeplasmatales",    
                                          "Bacillales",           
                                          "RFN20",
                                          "Bacillota",            
                                          "Oscillospirales",      
                                          "TANB77",               
                                          "Christensenellales",   
                                          "Lachnospirales",      
                                          "Clostridiales",        
                                          "Monoglobales_A",       
                                          "Peptostreptococcales", 
                                          "Monoglobales",        
                                          "UBA1212",
                                          "Bacillota_A"))) %>% 
  arrange(order)
genome_taxonomy <- 
  genome_taxonomy %>% 
  mutate(avg_mci = rowMeans(gifts_elements)) %>% 
  mutate(length = genome_stats$mag_length)

table_one <- 
  genome_taxonomy %>% 
  select(genome, order, avg_mci)
  
table_two <- 
  genome_taxonomy %>% 
  select(genome, phylum, avg_mci) %>%
  rename(order = 'phylum') %>% 
  bind_rows(., table_one) %>% 
  rename(group = 'order')

border_groups <- c("Bacillota_A", "Bacillota", "Bacteroidota",
                   "Cyanobacteriota", "Pseudomonadota", "Actinomycetota",
                   "Verrucomicrobiota", "Campylobacterota")

4.5.1 Average MCI by taxonomic phylum and order

table_two %>% 
  filter(group %in% selected_groups$order) %>% 
  mutate(group = factor(group, levels = selected_groups$order)) %>% 
  ggplot() +
  geom_jitter(aes(x = group, y = avg_mci), 
              color = 'grey', alpha = 0.5, width = 0.1) +
  geom_violin(aes(x = group, y = avg_mci, fill = group),
              color = NA,
              size = 0.01) +
  theme_minimal() +
  xlab("Taxonomic group") +
  ylab("Genome average metabolic capacity") +
  scale_fill_manual(values = selected_taxonomy_colors$color_order) +
  theme(legend.position = 'none',
        ) +
  coord_flip()

4.5.2 Genome length by taxonomic phylum and order

table_three <- 
  genome_taxonomy %>% 
  select(genome, order, length)

table_four <- 
  genome_taxonomy %>% 
  select(genome, phylum, length) %>%
  rename(order = 'phylum') %>% 
  bind_rows(., table_three) %>% 
  rename(group = 'order')

table_four %>% 
  filter(group %in% selected_groups$order) %>% 
  mutate(group = factor(group, levels = selected_groups$order)) %>% 
  ggplot() +
  geom_jitter(aes(x = group, y = length), color = 'grey', alpha = 0.5, width = 0.1) +
  geom_violin(aes(x = group, y = length, fill = group),
              color = NA,
              size = 0.01) +
  theme_minimal() +
  xlab("Taxonomic group") +
  ylab("Mag length") +
  scale_fill_manual(values = selected_taxonomy_colors$color_order) +
  theme(legend.position = 'none') +
  coord_flip()

rm(list = ls())

5 Alpha diversity

5.1 Load MG data

load("data/data_mg.Rdata")

5.2 Hill numbers - Alpha diversity

# Neutral
neutral <-
  mag_weighted %>% 
  dplyr::select(where(~ !all(. == 0))) %>% 
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "animal_code")

# Phylogenetic
phylogenetic <-
  mag_weighted %>% 
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1, tree = genome_tree) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(phylogenetic = 1) %>%
  rownames_to_column(var = "animal_code")

# Functional KEGG
dist_kegg <-
  genome_kegg_paths %>%
  column_to_rownames(var = "genome") %>%
  traits2dist(., method = "gower")

functional_kegg <-
  mag_weighted %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1 , dis = dist_kegg) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(functional_kegg = 1) %>%
  rownames_to_column(var = "animal_code") %>%
  mutate(functional_kegg = if_else(is.nan(functional_kegg), 1, functional_kegg))

# Sequence depth
log_seq_depth <-
  read_counts %>%
  column_to_rownames(var = 'genome') %>%  
  {as.data.frame(log(colSums(.)))} %>%
  rename(seq_depth = 'log(colSums(.))') %>%
  rownames_to_column(var = 'animal_code')

# Merge all metrics
alpha_div <-
  neutral %>%
  full_join(phylogenetic, by = 'animal_code') %>%
  full_join(functional_kegg, by = 'animal_code') %>%
  left_join(chicken_metadata, by = 'animal_code') %>%
  left_join(log_seq_depth, by = 'animal_code')
save(alpha_div, file = "data/alpha_div.Rdata")

5.2.1 Alpha diversity by sampling day

The resulting plot corresponds to Figure 2a.

load("data/alpha_div.Rdata")

# Neutral
p_n <- 
  ggplot(alpha_div, 
         aes(x = sampling_time, y = neutral, group = sampling_time)) +
  geom_jitter(aes_string(color = 'sampling_time'),
              size = 0.3, width = 0.25, alpha = 0.6) +
  geom_boxplot(aes_string(fill = 'sampling_time'),
               width = 0.5,
               alpha = 0.5,
               outlier.color = NA) +
  scale_fill_manual(values = c('#E69F00', '#CC6677', '#56B4E9')) +
  scale_color_manual(values = c('#E69F00', '#CC6677', '#56B4E9')) +
  theme_minimal() +
  ylab(NULL) +
  xlab(NULL) +
  ggtitle("Neutral") +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.title.x = element_text(size = 8),
        axis.title.y = element_text(size = 8),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 8),
        plot.title =  element_text(size = 10),
        legend.position = "none")

# Phylogenetic
p_p <- 
  ggplot(alpha_div, 
         aes(x = sampling_time, y = phylogenetic, group = sampling_time)) +
  geom_jitter(aes_string(colour = 'sampling_time'), 
              size = 0.3, width = 0.25, alpha = 0.6) +
  geom_boxplot(aes_string(fill = 'sampling_time'),
               width = 0.5,
               alpha = 0.5,
               outlier.color = NA) +
  scale_fill_manual(values = c('#E69F00', '#CC6677', '#56B4E9')) +
  scale_color_manual(values = c('#E69F00', '#CC6677', '#56B4E9')) +
  theme_minimal() +
  labs(x = NULL, y = NULL) +
  ggtitle("Phylogenetic") +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.title.x = element_text(size = 8),
        axis.title.y = element_text(size = 8),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 8),
        plot.title =  element_text(size = 10),
        legend.position = 'none')

# Functional KEGGs
p_f_kegg <-
  ggplot(alpha_div,
         aes(x = sampling_time, y = functional_kegg, group = sampling_time)) +
  geom_jitter(aes_string(colour = 'sampling_time'), 
              size = 0.3, width = 0.25, alpha = 0.6) +
  geom_boxplot(aes_string(fill = 'sampling_time'),
               width = 0.5,
               alpha = 0.5,
               outlier.color = NA) +
  scale_fill_manual(values = c("#E69F00", "#CC6677", "#56B4E9")) +
  scale_color_manual(values = c("#E69F00", "#CC6677", "#56B4E9")) +
  theme_minimal() +
  labs(x =NULL, y = NULL) +
  ggtitle("Functional") +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.title.x = element_text(size = 8),
        axis.title.y = element_text(size = 8),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 8),
        plot.title =  element_text(size = 10),
        legend.position = "none")

grid.arrange(p_n, p_p, p_f_kegg, ncol = 3)

5.3 Temporal development

The resulting tables correspond to Supplementary Table S2.

5.3.1 Neutral

m_div_n <-
  lme(neutral ~ seq_depth + sex + breed + treatment + trial * age,
      random = ~1|pen,
      data = alpha_div)

anova(m_div_n)
            numDF denDF   F-value p-value
(Intercept)     1   337 2131.3662  <.0001
seq_depth       1   337    4.1298  0.0429
sex             1    42    1.7688  0.1907
breed           1    42    0.2047  0.6533
treatment       2    42    1.4325  0.2501
trial           1    42    2.1839  0.1469
age             1   337   44.1644  <.0001
trial:age       1   337    0.7599  0.3840

5.3.2 Phylogenetic

m_div_p <-
  lme(phylogenetic ~ seq_depth + sex + breed + treatment + trial * age,
      random = ~ 1|pen,
      data = alpha_div)

anova(m_div_p)
            numDF denDF   F-value p-value
(Intercept)     1   337 22644.853  <.0001
seq_depth       1   337     0.413  0.5210
sex             1    42     4.093  0.0495
breed           1    42    11.799  0.0013
treatment       2    42     1.168  0.3210
trial           1    42     3.901  0.0549
age             1   337  1185.345  <.0001
trial:age       1   337     9.772  0.0019

5.3.3 Functional KEGG

m_div_f_kegg <-
  lme(functional_kegg ~ seq_depth + sex + breed + treatment + trial * age,
      random = ~ 1|pen,
      data = alpha_div)

anova(m_div_f_kegg)
            numDF denDF   F-value p-value
(Intercept)     1   337 307927.81  <.0001
seq_depth       1   337      0.20  0.6535
sex             1    42      0.79  0.3791
breed           1    42      3.00  0.0907
treatment       2    42      1.23  0.3016
trial           1    42      2.52  0.1203
age             1   337    406.41  <.0001
trial:age       1   337      1.17  0.2799
rm(list = ls())

6 Bacterial community composition

6.1 Load MG data

load("data/data_mg.Rdata")

6.2 Hill numbers - beta diversity

# Neutral
beta_q1n <- 
  mag_weighted %>% 
  filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
  dplyr::select_if(~!all(. == 0)) %>%
  hillpair(., q = 1, metric = "S", out = "pair")

# Phylogenetic
beta_q1p <- 
  mag_weighted %>% 
  filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
  dplyr::select_if(~!all(. == 0)) %>%
  hillpair(., q = 1, tree = genome_tree, metric = "S", out = "pair")

# Functional KEGG
dist_kegg <-
  genome_kegg_paths %>%
  column_to_rownames(var = "genome") %>%
  traits2dist(., method = "gower")

beta_q1f_kegg <-
  mag_weighted %>%
  filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
  dplyr::select_if(~!all(. == 0)) %>%
  hillpair(., q = 1, dist = dist_kegg, metric = "S", out = "pair")
save(beta_q1n, 
     beta_q1p, 
     beta_q1f_gifts,
     file = "data/beta_div.Rdata")

6.2.1 Compare individuals from same trial and pen between sampling days

The resulting stats correspond to Supplementary Table S3.

load("data/beta_div.Rdata")

metadata_2 <-
  chicken_metadata %>%
  dplyr::rename(second = 'animal_code',
                sampling_time_2 = 'sampling_time',
                pen_2 = 'pen',
                trial_2 = 'trial')

# Neutral 
betadiv_n_dis <-
  beta_q1n %>% 
  rename(animal_code = 'first') %>%
  left_join(chicken_metadata, by = 'animal_code') %>%
  left_join(metadata_2, by = 'second') %>%
  mutate(diff = case_when(
    sampling_time == '7' & sampling_time_2 == '21' ~ '7_21',
    sampling_time == '21' & sampling_time_2 == '35' ~ '21_35',
    sampling_time == '7' & sampling_time_2 == '35' ~ '7_35')) %>%
  mutate(diff_trial = case_when(trial == 'CA' & trial_2 == 'CA' ~ 'CA',
                                trial == 'CB' & trial_2 == 'CB' ~ 'CB')) %>%
  mutate(diff_pen = case_when(pen == pen_2  ~ 'same')) %>%
  drop_na(diff, diff_trial, diff_pen)

# Phylogenetic
betadiv_p_dis <-
  beta_q1p %>% 
  rename(animal_code = 'first') %>%
  left_join(chicken_metadata, by = 'animal_code') %>%
  left_join(metadata_2, by = 'second') %>%
  mutate(diff = case_when(
    sampling_time == '7' & sampling_time_2 == '21' ~ '7_21',
    sampling_time == '21' & sampling_time_2 == '35' ~ '21_35',
    sampling_time == '7' & sampling_time_2 == '35' ~ '7_35')) %>%
  mutate(diff_trial = case_when(trial == 'CA' & trial_2 == 'CA' ~ 'CA',
                                trial == 'CB' & trial_2 == 'CB' ~ 'CB')) %>%
  mutate(diff_pen = case_when(pen == pen_2  ~ 'same')) %>%
  drop_na(diff, diff_trial, diff_pen)

# Functional KEGG
betadiv_f_dis_kegg <-
  beta_q1f_kegg %>%
  rename(animal_code = 'first') %>%
  left_join(chicken_metadata, by = 'animal_code') %>%
  left_join(metadata_2, by = 'second') %>%
  mutate(diff = case_when(
    sampling_time == '7' & sampling_time_2 == '21' ~ '7_21',
    sampling_time == '21' & sampling_time_2 == '35' ~ '21_35',
    sampling_time == '7' & sampling_time_2 == '35' ~ '7_35')) %>%
  mutate(diff_trial = case_when(trial == 'CA' & trial_2 == 'CA' ~ 'CA',
                                trial == 'CB' & trial_2 == 'CB' ~ 'CB')) %>%
  mutate(diff_pen = case_when(pen == pen_2  ~ 'same')) %>%
  drop_na(diff, diff_trial, diff_pen)
betadiv_n_dis %>%
  group_by(diff) %>%
  summarise(mean = mean(S), sd = sd(S))
# A tibble: 3 x 3
  diff   mean     sd
  <chr> <dbl>  <dbl>
1 21_35 0.458 0.100 
2 7_21  0.542 0.0907
3 7_35  0.597 0.0917
betadiv_p_dis %>%
  group_by(diff) %>%
  summarise(mean = mean(S), sd = sd(S))
# A tibble: 3 x 3
  diff   mean     sd
  <chr> <dbl>  <dbl>
1 21_35 0.168 0.0776
2 7_21  0.238 0.0883
3 7_35  0.297 0.103 
betadiv_f_dis_kegg %>%
  group_by(diff) %>%
  summarise(mean = mean(S), sd = sd(S))
# A tibble: 3 x 3
  diff   mean    sd
  <chr> <dbl> <dbl>
1 21_35 0.205 0.246
2 7_21  0.252 0.291
3 7_35  0.227 0.262

6.2.2 Plot difference between sampling points for beta diversity

The resulting plot corresponds to Supplementary Figure S3.

# Neutral
p_n <- 
  betadiv_n_dis %>%
  filter(!diff == '7_35') %>%
  mutate(diff = factor(diff, levels = c('7_21', '21_35'))) %>%
  ggplot(aes(x = diff, y = S, fill = diff)) +
  geom_boxplot() +
  ylim(0,0.5) +
  theme_minimal() +
  theme(legend.position = 'none') +
  ggtitle('Neutral')

# Phylogenetic
p_p <- 
  betadiv_p_dis %>%
  filter(!diff == '7_35') %>%
  mutate(diff = factor(diff, levels = c('7_21', '21_35'))) %>%
  ggplot(aes(x = diff, y = S, fill = diff)) +
  geom_boxplot() +
  ylim(0,0.5) +
  theme_minimal() +
  theme(legend.position = 'none') +
  ggtitle('Phylogenetic')

# Functional KEGG
p_f_kegg <-
  betadiv_f_dis_kegg %>%
  filter(!diff == '7_35') %>%
  mutate(diff = factor(diff, levels = c('7_21', '21_35'))) %>%
  ggplot(aes(x = diff, y = S, fill = diff)) +
  geom_boxplot() +
  ylim(0,0.5) +
  theme_minimal() +
  theme(legend.position = 'none') +
  ggtitle('Functional')


grid.arrange(p_n, p_p, p_f_kegg, ncol = 3)

6.3 Effect of design in microbiome composition

6.3.1 Permanova values for different metrics of beta diversity

The resulting stats correspond to Supplementary Table S3.

# Dissimilarity tables
perm <- how(nperm = 999)
setBlocks(perm) <- with(chicken_metadata, pen)

# Neutral
b1n_dis_table <- 
  beta_q1n %>%
  bind_rows(beta_q1n %>% rename(first = second, second = first)) %>% 
  pivot_wider(names_from = second, values_from = S) %>%
  column_to_rownames('first') %>%
  mutate(across(everything(), ~replace(., is.na(.), 0))) %>% 
  as.dist()

adonis2(b1n_dis_table ~ trial + sex + breed + treatment + trial + trial * age,
        permutations = perm,
        data = chicken_metadata,
        by = "term")
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Blocks:  with(chicken_metadata, pen) 
Permutation: free
Number of permutations: 999

adonis2(formula = b1n_dis_table ~ trial + sex + breed + treatment + trial + trial * age, data = chicken_metadata, permutations = perm, by = "term")
           Df SumOfSqs      R2       F Pr(>F)    
trial       1    2.860 0.05468 26.0870  0.001 ***
sex         1    0.492 0.00941  4.4878  0.001 ***
breed       1    0.905 0.01731  8.2583  0.001 ***
treatment   2    0.857 0.01638  3.9066  0.001 ***
age         1    4.137 0.07909 37.7309  0.001 ***
trial:age   1    1.393 0.02663 12.7057  0.001 ***
Residual  380   41.665 0.79651                   
Total     387   52.310 1.00000                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Phylogenetic
b1p_dis_table <- 
  beta_q1p %>%
  bind_rows(beta_q1n %>% rename(first = second, second = first)) %>% 
  pivot_wider(names_from = second, values_from = S) %>%
  column_to_rownames('first') %>%
  mutate(across(everything(), ~replace(., is.na(.), 0))) %>% 
  as.dist()

adonis2(b1p_dis_table ~ trial + sex + breed + treatment + trial + trial * age,
        permutations = perm,
        data = chicken_metadata,
        by = "term")
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Blocks:  with(chicken_metadata, pen) 
Permutation: free
Number of permutations: 999

adonis2(formula = b1p_dis_table ~ trial + sex + breed + treatment + trial + trial * age, data = chicken_metadata, permutations = perm, by = "term")
           Df SumOfSqs      R2       F Pr(>F)    
trial       1    2.860 0.05468 26.0870  0.001 ***
sex         1    0.492 0.00941  4.4878  0.001 ***
breed       1    0.905 0.01731  8.2583  0.001 ***
treatment   2    0.857 0.01638  3.9066  0.001 ***
age         1    4.137 0.07909 37.7309  0.001 ***
trial:age   1    1.393 0.02663 12.7057  0.001 ***
Residual  380   41.665 0.79651                   
Total     387   52.310 1.00000                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Functional KEGG
b1f_dis_table_kegg <-
  beta_q1f_kegg %>%
  bind_rows(beta_q1n %>% rename(first = second, second = first)) %>%
  pivot_wider(names_from = second, values_from = S) %>%
  column_to_rownames('first') %>%
  mutate(across(everything(), ~replace(., is.na(.), 0))) %>%
  as.dist()

adonis2(b1f_dis_table_kegg ~ trial + sex + breed + treatment + trial + trial * age,
        permutations = perm,
        data = chicken_metadata,
        by = "term")
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Blocks:  with(chicken_metadata, pen) 
Permutation: free
Number of permutations: 999

adonis2(formula = b1f_dis_table_kegg ~ trial + sex + breed + treatment + trial + trial * age, data = chicken_metadata, permutations = perm, by = "term")
           Df SumOfSqs      R2       F Pr(>F)    
trial       1    2.860 0.05468 26.0870  0.001 ***
sex         1    0.492 0.00941  4.4878  0.001 ***
breed       1    0.905 0.01731  8.2583  0.001 ***
treatment   2    0.857 0.01638  3.9066  0.001 ***
age         1    4.137 0.07909 37.7309  0.001 ***
trial:age   1    1.393 0.02663 12.7057  0.001 ***
Residual  380   41.665 0.79651                   
Total     387   52.310 1.00000                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6.3.2 Community composition

perm <- how(nperm = 999)
setBlocks(perm) <- with(chicken_metadata, pen)

t_hel <- 
 hel %>% 
  t() %>% 
  as.data.frame()
  
adonis2(t_hel ~ trial * sampling_time + 
              breed * sampling_time +
              sex * sampling_time +
              treatment * sampling_time,
        permutations = perm,
        data = chicken_metadata,
        by = "term")
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Blocks:  with(chicken_metadata, pen) 
Permutation: free
Number of permutations: 999

adonis2(formula = t_hel ~ trial * sampling_time + breed * sampling_time + sex * sampling_time + treatment * sampling_time, data = chicken_metadata, permutations = perm, by = "term")
                         Df SumOfSqs      R2       F Pr(>F)    
trial                     1   0.7573 0.02939 16.6760  0.001 ***
sampling_time             2   6.3129 0.24504 69.5085  0.001 ***
breed                     1   0.2590 0.01005  5.7035  0.001 ***
sex                       1   0.0971 0.00377  2.1387  0.001 ***
treatment                 2   0.1423 0.00552  1.5664  0.001 ***
trial:sampling_time       2   0.8781 0.03408  9.6680  0.001 ***
sampling_time:breed       2   0.1629 0.00632  1.7937  0.026 *  
sampling_time:sex         2   0.1321 0.00513  1.4548  0.086 .  
sampling_time:treatment   4   0.2188 0.00849  1.2047  0.174    
Residual                370  16.8020 0.65219                   
Total                   387  25.7625 1.00000                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(t_hel ~ trial * sampling_time + breed + sex + treatment,
        permutations = perm,
        data = chicken_metadata,
        by = "term")
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Blocks:  with(chicken_metadata, pen) 
Permutation: free
Number of permutations: 999

adonis2(formula = t_hel ~ trial * sampling_time + breed + sex + treatment, data = chicken_metadata, permutations = perm, by = "term")
                     Df SumOfSqs      R2       F Pr(>F)    
trial                 1   0.7573 0.02939 16.5310  0.001 ***
sampling_time         2   6.3129 0.24504 68.9041  0.001 ***
breed                 1   0.2590 0.01005  5.6539  0.001 ***
sex                   1   0.0971 0.00377  2.1201  0.001 ***
treatment             2   0.1423 0.00552  1.5528  0.001 ***
trial:sampling_time   2   0.8781 0.03408  9.5839  0.001 ***
Residual            378  17.3159 0.67214                   
Total               387  25.7625 1.00000                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6.4 Community composition development

6.4.1 Distance-based RDA

The resulting plot corresponds to Figure 2b in the manuscript.

set.seed(4)
hel_bray <- vegdist(t(hel), method = 'bray')
hel_bray_sqrt <- sqrt(hel_bray)

pcoa <- cmdscale(hel_bray_sqrt, k = ncol(hel) - 1, eig = TRUE)
pcoa_scores <- pcoa$points

db_rda <- rda(pcoa_scores ~ sampling_time, data = chicken_metadata)

# Stats
anova(db_rda, by = 'term')
Permutation test for rda under reduced model
Terms added sequentially (first to last)
Permutation: free
Number of permutations: 999

Model: rda(formula = pcoa_scores ~ sampling_time, data = chicken_metadata)
               Df Variance      F Pr(>F)    
sampling_time   2 0.022776 27.979  0.001 ***
Residual      385 0.156701                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RsquareAdj(db_rda)
$r.squared
[1] 0.1269005

$adj.r.squared
[1] 0.1223649
db_rda_scores <-
  data.frame(
    scores(db_rda, display = 'wa'),
    pen = chicken_metadata$pen,
    time = chicken_metadata$sampling_time
    )

db_rda_scores %>%
  ggplot(aes(x = RDA1, y = RDA2, colour = time)) +
  geom_point(size = 3) +
  scale_color_manual(values = c('#e6a024', '#cc6777', '#5bb4e5')) +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 12),
        axis.text.x = element_text(size = 10),
        axis.text.y = element_text(size = 10),
        axis.title = element_text(size = 8),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank())

rm(list = ls())

8 Temporal HMSC setup

HMSC models are computationally intensive. It is advisable to run this script on a server. The code is made to be reproduced on any supercomputer.

8.1 Load MG data

8.1.1 Define directories

# Create directories
dir.create("temporal_hmsc")
dir.create("temporal_hmsc/model_fit")
dir.create("temporal_hmsc/models")
dir.create("temporal_hmsc/panels")

# Define paths
localDir = "."
ModelDir = file.path(localDir, "temporal_hmsc/models")
PanelDir = file.path(localDir, "temporal_hmsc/panels")

8.1.2 Generate input objects

# Load data
load("data/data_mg.Rdata")

# Metadata summary
design <- 
  chicken_metadata[, c('animal_code', 'trial', 'pen', 'age','breed',
                       'sex', 'treatment', 'chicken_body_weight')] %>% 
  column_to_rownames("animal_code")

design$log_seq_depth <- log(colSums(read_counts %>% column_to_rownames("genome")))
design$animal_code <- rownames(design)

for (i in 1:ncol(design)) {
  if (is.character(design[,i])) {
    design[,i] <- factor(design[,i])
  }
}

8.2 Prepate data for HMSC

8.2.1 Define tables

# Genome count table (quantitative community data)
YData <- log1p(mag_weighted %>% 
                 t() %>% 
                 as.data.frame())

# Fixed effects data (explanatory variables)
XData <- design
mean(rownames(YData) == rownames(XData))  # Ydata and XData in the same order

# Genome phylogeny
PData <- genome_tree

YData <- YData[,colnames(YData) %in% genome_tree$tip.label]  # Filter missing MAGs in tree

8.3 Define formula

# Define XFormula
XFormula_Time <- ~log_seq_depth + trial + sex + breed + treatment + age

# Study Design
StudyDesign <- design[,c(2,9)]
rL.Pen <- HmscRandomLevel(units = levels(StudyDesign$pen))

8.3.1 Define HMSC model

m <- Hmsc(Y = YData,
                XData = XData,
                XFormula = XFormula_Time,
                studyDesign = StudyDesign,
                ranLevels = list("pen" = rL.Pen),
                phyloTree = genome_tree,
                distr = "normal",
                YScale = TRUE)

save(m, file = file.path(ModelDir,"unfitted_model.Rdata"))

8.3.2 Define MCMC characteristics

## How often to sample the MCM
samples_list = c(5, 250, 250)

# The number of MCMC steps between each recording sample
thin_list = c(1,1,10)
nst = length(thin_list)

# The number of MCMC chains ot use
nChains = 4

# Load only the unfitted models
load(file.path(ModelDir, "unfitted_model.Rdata"))
unf_model <- m

8.4 Fit model

set.seed(1)

for (Lst in 1:length(samples_list)) {
  thin = thin_list[Lst]
  samples = samples_list[Lst]
  # Fit the model
  m = sampleMcmc(unf_model,
                 samples = samples,
                 thin = thin,
                 adaptNf = rep(ceiling(0.4*samples*thin), unf_model$nr),
                 transient = ceiling(0.5*samples*thin),
                 nChains = nChains,
                 nParallel = nChains)
  # Create file name
  filename = paste("temporal_model",
                   "_thin_", as.character(thin),
                   "_samples_", as.character(samples),
                   "_chains_", as.character(nChains),
                   ".Rdata", sep = "")
  # Save file
  save(m, file = file.path(ModelDir, filename)) 
}

8.5 Evaluate convergence

Figure not added to Supplementary.

set.seed(1)

for (i in 1) {
  ma = NULL
  na = NULL
  ma_rho = NULL
  na_rho = NULL
  for (Lst in 1:nst) {
    thin = thin_list[Lst]
    samples = samples_list[Lst]
    filename = paste("temporal_model","_thin_", as.character(thin),
                     "_samples_", as.character(samples),
                     "_chains_", as.character(nChains),
                     ".Rdata", sep = "")
    load(file = file.path(ModelDir, filename))
    mpost = convertToCodaObject(m,
                                spNamesNumbers = c(T,F),
                                covNamesNumbers = c(T,F))
    
    ## Beta - Species niches - response of MAGs to the fixed effects
    psrf.beta = gelman.diag(mpost$Beta,multivariate = FALSE)$psrf
    
    ## Rho - Influence of phylogeny on species niches - phylogenetic signal
    psrf.rho = gelman.diag(mpost$Rho,multivariate = FALSE)$psrf
    
    ## Beta
    if (is.null(ma)) {
      ma = psrf.beta[,1]
      na = paste0("temporal_model_", as.character(thin),
                  ",", as.character(samples))
    } else {
      ma = cbind(ma,psrf.beta[,1])
      na = c(na,paste0("temporal_model_", as.character(thin),
                       ",", as.character(samples)))
    }
    ## Rho
    if (is.null(ma_rho)) {
      ma_rho = psrf.rho[,1]
      na_rho = paste0("temporal_model_", as.character(thin),
                      ",", as.character(samples))
    } else {
      ma_rho = cbind(ma_rho,psrf.rho[,1])
      na_rho = c(na_rho,paste0("temporal_model_", as.character(thin),
                               ",", as.character(samples)))
    }
  }
  
  # Create file name
  panel.name = paste("MCMC_convergence",
                     "temporal_model_", as.character(i),
                     ".pdf", sep = "")
  # Plot
  pdf(file = file.path(PanelDir, panel.name))
  
  ## Beta
  par(mfrow = c(2,1))
  vioplot(ma,
          col = rainbow_hcl(1),
          names = na,
          ylim = c(0, max(ma)),
          main = "psrf(beta)")
  vioplot(ma,
          col = rainbow_hcl(1),
          names = na,
          ylim = c(0.9,1.1),
          main = "psrf(beta)")

  ## Rho
  par(mfrow = c(2,1))
  vioplot(ma_rho,
          col = rainbow_hcl(1),
          names = na_rho,
          ylim = c(0, max(ma_rho)),
          main = "psrf(rho)")
  vioplot(ma_rho,
          col = rainbow_hcl(1),
          names = na_rho,
          ylim = c(0.9,1.1),
          main = "psrf(rho)")
  dev.off()
}
rm(list = ls())

9 HMSC analysis

9.1 Load model

# Directory
localDir = "."
ModelDir = file.path(localDir, "temporal_hmsc/models")
MFDir = file.path(localDir, "temporal_hmsc/model_fit")

# Load model
if(!file.exists("temporal_hmsc/models/temporal_model_thin_10_samples_250_chains_4.Rdata")){
  download.file(
    url = 'https://sid.erda.dk/share_redirect/Bd8UfDO2D6/temporal_model_thin_10_samples_250_chains_4.Rdata',
    destfile = 'temporal_hmsc/models/temporal_model_thin_10_samples_250_chains_4.Rdata', method = 'curl'
    )
}

load(file = file.path(ModelDir, 'temporal_model_thin_10_samples_250_chains_4.Rdata'))

9.2 Cross-validation

The cross-validation part is also computationally intense. Make sure you have the right capacity for it.

# Create partitions
set.seed(12)
partition_1 <- createPartition(hM = m, nfolds = 2)
partition_2 <- c(rep(1,sum(m$XData$trial == "CA")),
                 rep(2,sum(m$XData$trial == "CB")))

set.seed(1)

predY.MF <- computePredictedValues(m, expected = TRUE)
MF <- evaluateModelFit(hM = m, predY = predY.MF)

filename <- file.path(MFDir, paste("MF_chains_4_thin_10_samples_250.rds"))
saveRDS(MF, file = filename)

MF <- readRDS(file = filename)
mean(MF$R2)

9.2.1 Model fit using 2-fold CV: samples randomly assigned to folds

set.seed(1)
predY.CV_1 <- computePredictedValues(m,
                                     expected = TRUE,
                                     partition = partition_1,
                                     nChains = 1,
                                     nParallel = 1)

MF.CV_1 <- evaluateModelFit(hM = m, predY = predY.CV_1)

# create file name and save
filename <- file.path(MFDir, paste("MF_CV1_chains_4_thin_10_samples_250.rds"))
saveRDS(MF.CV_1,file = filename)

MF_CV_1 <- readRDS(file = filename)
mean(MF_CV_1$R2)

9.2.2 Model fit using 2-fold CV: each fold contains the samples from one trial

set.seed(1)
predY.CV_2 <- computePredictedValues(m,
                                     expected = TRUE,
                                     partition = partition_2,
                                     nChains = 1,
                                     nParallel = 1)

MF.CV_2 <- evaluateModelFit(hM = m, predY = predY.CV_2)

# create file name and save
filename <- file.path(MFDir, paste("MF_CV2_chains_4_thin_10_samples_250.rds"))
saveRDS(MF.CV_2, file = filename)

MF_CV_2 <- readRDS(file = filename)
mean(MF_CV_2$R2)

9.3 Functional structure

beta_post <- getPostEstimate(m, parName = "Beta")
plotBeta(m, beta_post, supportLevel = 0.95, plotTree = TRUE)

Gradient_age <- constructGradient(m,
                                 focalVariable = "age",
                                 non.focalVariables = 1)

predY_age <- predict(m, Gradient = Gradient_age, expected = TRUE)

# Example using cmag_376
plotGradient(m,
             Gradient_age,
             pred = predY_age,
             yshow = 0,
             measure = "Y",
             index = 376,
             showData = TRUE)

9.5 Correlate bacterial MCI and response to time

The resulting plot corresponds to Figure 2e.

load("data/data_mg.Rdata")
trends <- read_tsv("data/hmsc_mag_trend.tsv")
mci_df <-
  gifts_elements %>%
  as.data.frame() %>% 
  mutate(avg_mci = rowMeans(.)) %>% 
  select(avg_mci) %>% 
  rownames_to_column(var = 'genome')

 
trends %>% 
  left_join(genome_taxonomy %>% select(genome, order), by = 'genome') %>% 
  left_join(mci_df, by = 'genome') %>% 
  ggplot() +
  geom_point(aes(x = parameter, y = avg_mci, color = order), size = 1) +
  scale_color_manual(values = order_colors) +
  geom_smooth(aes(x = parameter, y = avg_mci),
              method = 'lm', linewidth = 0.3) +
  theme_minimal() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.title.x = element_text(size = 8),
        axis.title.y = element_text(size = 8),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 8),
        legend.position = 'none')

9.6 Variance partitioning

The resulting results correspond to Supplementary Table S4.

VP <- computeVariancePartitioning(m)
plotVariancePartitioning(hM = m, VP = VP)

9.7 Phylogenetic correlogram

The resulting plot corresponds to Figure 2d.

9.7.1 Calcultate phylogenetic distance

pw_phylo_dist <- cophenetic.phylo(genome_tree)
xy <- t(combn(colnames(pw_phylo_dist), 2))
pw_phylo_dist <- data.frame(xy, dist = pw_phylo_dist[xy])
colnames(pw_phylo_dist) <- c('genome.x', 'genome.y', 'distance')

pw_phylo_dist_taxa <-
  pw_phylo_dist %>%
  inner_join(genome_taxonomy, by = c('genome.x' = 'genome')) %>%
  inner_join(genome_taxonomy, by = c('genome.y' = 'genome'))


# Create distance table
tax_table <- c()
for (i in c(1:nrow(pw_phylo_dist_taxa))) {
  pair <- pw_phylo_dist_taxa[i,]
  #Phylum level
  if (!is.na(pair$phylum.x != pair$phylum.y) & pair$phylum.x != pair$phylum.y) {
    row <- c('Phylum', pair$distance)
  } else {
    #Class level
    if (!is.na(pair$class.x != pair$class.y) & pair$class.x != pair$class.y) {
      row <- c('Class', pair$distance)
    } else {
      #Order level
      if (!is.na(pair$order.x != pair$order.y) & pair$order.x != pair$order.y) {
        row <- c('Order', pair$distance)
      } else {
        #Family level
        if (!is.na(pair$family.x != pair$family.y) & pair$family.x != pair$family.y) {
          row <- c('Family', pair$distance)
        } else {
          # Genus
          if (!is.na(pair$genus.x != pair$genus.y) & pair$genus.x != pair$genus.y) {
            row <- c('Genus', pair$distance)
          }
        }
      }
    }
  }
  tax_table <- rbind(tax_table,row)
}

tax_table <-
  tax_table %>%
  data.frame() %>%
  dplyr::rename(taxonomy = 'X1', distance = 'X2') %>%
  mutate(distance = as.numeric(distance)) %>%
  write_tsv("data/taxonomy_distance.tsv")
tax_table <- read_tsv("data/taxonomy_distance.tsv")

tax_table %>%
ggplot(aes(x = distance,
           fill = taxonomy,
           color = taxonomy,
           alpha = 0.1
           )) +
  geom_density(adjust = 5) +
  scale_fill_manual(values = c('#E69F00','#e28cff','#999999','#56B4E9','#99cc00')) +
  scale_color_manual(values = c('#E69F00','#e28cff','#999999','#56B4E9','#99cc00')) +
  theme_void()

9.7.2 Phylogenetic signal

mpost <- convertToCodaObject(m)
quantile(unlist(mpost$Rho), probs = c(.05,.5,.95))

age_parameter <- beta_post$mean[8,]
age_parameter_phyloSorted <-
  data.frame(age_parameter = age_parameter[
    match(m$phyloTree$tip.label,
          names(age_parameter))
    ])
mean(rownames(age_parameter_phyloSorted) == m$phyloTree$tip.label)

new_tree <- m$phyloTree
new_tree$node.label <- NULL

obj <- phylo4d(new_tree, tip.data = age_parameter_phyloSorted)
barplot.phylo4d(obj)

age.cg <- phyloCorrelogram(obj, trait = "age_parameter")
saveRDS(age.cg,file = "age.cg.rds")
age.cg <- readRDS(file = "age.cg.rds")

plot(age.cg)
rm(list = ls())

10 Bacterial community composition

10.1 Load MG data

load("data/data_mg.Rdata")

10.2 GIFTs per MAG

10.2.1 Plot GIFTs of all bacteria by element

This plot is not included in Supplementary

gifts_elements %>%
  as.data.frame() %>% 
  rownames_to_column(var = 'genome') %>% 
  pivot_longer(!genome, names_to = 'Code_element', values_to = 'mci') %>%
  inner_join(GIFT_db2, by = 'Code_element') %>%
  ggplot(aes(x = genome,
             y = Code_element,
             fill = mci)) +
  geom_tile(color = '#ffffff') +
  scale_y_discrete(guide = guide_axis(check.overlap = TRUE)) +
  scale_x_discrete(guide = guide_axis(check.overlap = TRUE)) +
  facet_grid(Code_function ~ ., scales = 'free', space = 'free') +
  scale_fill_gradientn(colours = rev(terrain.colors(10))) +
  theme_void(base_size = 18) +
  theme(axis.text.y = element_text(),
        legend.position = 'top')

10.3 GIFTs per community

10.3.1 Calculate community GIFTs

elements_com <- to.community(gifts_elements, sweep(hel, 2, colSums(hel), FUN = "/"), GIFT_db2) 

funcs_com <- to.community(gifts_functions, sweep(hel, 2, colSums(hel), FUN = "/"), GIFT_db2)

domains_com <- to.community(gifts_domains, sweep(hel, 2, colSums(hel), FUN = "/"), GIFT_db2)
save(elements_com,
     funcs_com,
     domains_com,
     file = "data/mci_com.Rdata")

10.3.2 Plot community MCI

The resulting plot corresponds to Figure 2f in the manuscript.

load("data/mci_com.Rdata")

funcs_com %>%
  as.data.frame() %>% 
  rownames_to_column('animal_code') %>%
  rowwise() %>%
  mutate(overall = mean(c_across(B01:D09))) %>%
  select(animal_code, overall) %>%
  left_join(chicken_metadata %>%
            select(animal_code, sampling_time),
            by = 'animal_code') %>%
  ggplot(aes(x = sampling_time,
             y = overall,
             group = sampling_time)) +
  geom_jitter(aes_string(colour = 'sampling_time'), size = 0.3, width = 0.2, alpha = 0.6) +
  geom_boxplot(aes_string(fill = 'sampling_time'), width = 0.4, alpha = 0.5, outlier.color = NA) +
  scale_fill_manual(values = c('#E69F00', '#CC6677', '#56B4E9')) +
  scale_color_manual(values = c('#E69F00', '#CC6677', '#56B4E9')) +
  theme_minimal() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.title.x = element_text(size = 8),
        axis.title.y = element_text(size = 8),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 8),
        plot.title =  element_text(size = 10),
        legend.position = 'none')

rm(list = ls())

11 Associate microbial community metrics with chicken body weight

11.1 Load MG and MT data

load("data/data_mg.Rdata")
load("data/alpha_div.Rdata")
load("data/mci_com.Rdata")

11.2 Associate alpha diversity metrics with chicken BW

The resulting plots are not included in Supplementary.

11.2.1 Neutral

ggplot(alpha_div, aes(x = neutral, y = chicken_body_weight)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sampling_time*trial, scales = 'free')

11.3 Phylogenetic

ggplot(alpha_div, aes(x = phylogenetic, y = chicken_body_weight)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sampling_time * trial, scales = 'free')

11.4 Functional KEGG

ggplot(alpha_div, aes(x = functional_kegg, y = chicken_body_weight)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sampling_time * trial, scales = 'free')

11.5 Linear regressions

11.5.1 Neutral

The resulting plot corresponds to Figure 3a.

div_all_day_35 <-
  alpha_div %>%
  filter(sampling_time == '35') %>%
  filter(trial != 'CC') #%>% 
  # filter(!animal_code %in% c("CB12.17", "CA04.16", "CA03.17")) # ¿? double check if we are OK filtering some samples, they show out of place diversity

N <- lme(chicken_body_weight ~ age + trial + neutral,
         random = ~1|pen,
         data = div_all_day_35)
summary(N)
Linear mixed-effects model fit by REML
  Data: div_all_day_35 
      AIC      BIC    logLik
  1971.21 1988.686 -979.6049

Random effects:
 Formula: ~1 | pen
        (Intercept) Residual
StdDev:     146.892 267.3176

Fixed effects:  chicken_body_weight ~ age + trial + neutral 
                 Value Std.Error DF   t-value p-value
(Intercept) -2377.5282 1033.8704 90 -2.299638  0.0238
age           136.6093   28.5987 90  4.776764  0.0000
trialCB       -37.8228   63.4256 46 -0.596333  0.5539
neutral        -2.8996    0.8471 90 -3.423055  0.0009
 Correlation: 
        (Intr) age    trilCB
age     -0.995              
trialCB  0.047 -0.091       
neutral -0.108  0.020  0.151

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-1.9136097 -0.7156661 -0.0462182  0.5659185  2.7314491 

Number of Observations: 140
Number of Groups: 48 
plot_model(N,
           type = 'eff',
           title = 'Neutral diversity',
           terms = 'neutral',
           show.data = TRUE)

11.5.2 Phylogenetic

P <- lme(chicken_body_weight ~ age + trial + phylogenetic,
         random = ~1|pen,
         data = div_all_day_35)
summary(P)
Linear mixed-effects model fit by REML
  Data: div_all_day_35 
       AIC      BIC    logLik
  1971.488 1988.964 -979.7442

Random effects:
 Formula: ~1 | pen
        (Intercept) Residual
StdDev:    133.6237 280.5324

Fixed effects:  chicken_body_weight ~ age + trial + phylogenetic 
                  Value Std.Error DF   t-value p-value
(Intercept)  -2405.5851 1082.1821 90 -2.222902  0.0287
age            142.6677   29.9857 90  4.757858  0.0000
trialCB         10.8886   62.3303 46  0.174691  0.8621
phylogenetic   -64.2194   33.3309 90 -1.926725  0.0572
 Correlation: 
             (Intr) age    trilCB
age          -0.970              
trialCB       0.086 -0.086       
phylogenetic -0.137 -0.104 -0.123

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-1.8560595 -0.6281856 -0.0497021  0.6532160  2.7098731 

Number of Observations: 140
Number of Groups: 48 
plot_p <-
  plot_model(P,
             type = 'eff',
             title = 'Phylogenetic diversity',
             terms = 'phylogenetic',
             show.data = TRUE)

11.5.3 Functional KEGG

Q_kegg <- lme(chicken_body_weight ~ age + trial + functional_kegg,
         random = ~1|pen,
         data = div_all_day_35)
summary(Q_kegg)
Linear mixed-effects model fit by REML
  Data: div_all_day_35 
       AIC      BIC    logLik
  1968.771 1986.247 -978.3854

Random effects:
 Formula: ~1 | pen
        (Intercept) Residual
StdDev:    142.4688 281.7832

Fixed effects:  chicken_body_weight ~ age + trial + functional_kegg 
                     Value Std.Error DF   t-value p-value
(Intercept)     -3102.4077 1382.9000 90 -2.243407  0.0273
age               135.6025   30.3166 90  4.472873  0.0000
trialCB            -4.7767   63.6887 46 -0.075001  0.9405
functional_kegg   322.1611  726.2162 90  0.443616  0.6584
 Correlation: 
                (Intr) age    trilCB
age             -0.687              
trialCB          0.063 -0.095       
functional_kegg -0.625 -0.137 -0.016

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-2.00634774 -0.63344629 -0.09338556  0.53395793  2.56270649 

Number of Observations: 140
Number of Groups: 48 
plot_q <-
  plot_model(Q_kegg,
             type = 'eff',
             title = 'Functional diversity',
             terms = 'functional_kegg',
             show.data = TRUE)

11.6 Associate community MCI with diversity

domains_com_2 <-
  funcs_com %>%
  as.data.frame(optional = TRUE) %>% 
  rownames_to_column(var = "animal_code") %>% 
  rowwise() %>%
  mutate(overall_com_mci = mean(c_across(B01:D09))) %>%
  select(animal_code, overall_com_mci) %>%
  left_join(alpha_div) %>%
  filter(sampling_time == '35')

# Neutral
N <- lme(neutral ~ age + trial + overall_com_mci, 
         random = ~1|pen,
         data = domains_com_2)
summary(N)
Linear mixed-effects model fit by REML
  Data: domains_com_2 
       AIC      BIC    logLik
  1320.058 1337.534 -654.0289

Random effects:
 Formula: ~1 | pen
        (Intercept) Residual
StdDev:     14.1856 25.89659

Fixed effects:  neutral ~ age + trial + overall_com_mci 
                    Value Std.Error DF   t-value p-value
(Intercept)      381.3418 121.82183 90  3.130324  0.0024
age               -0.5173   2.76996 90 -0.186768  0.8523
trialCB           -9.2723   6.09225 46 -1.521988  0.1349
overall_com_mci -906.4972 254.91596 90 -3.556063  0.0006
 Correlation: 
                (Intr) age    trilCB
age             -0.807              
trialCB          0.107 -0.093       
overall_com_mci -0.576 -0.017 -0.095

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-2.3225513 -0.7220818  0.0119940  0.6066901  2.4257749 

Number of Observations: 140
Number of Groups: 48 
# Phylogenetic
P <- lme(phylogenetic ~ age + trial + overall_com_mci,
         random = ~1|pen,
         data = domains_com_2)
summary(P)
Linear mixed-effects model fit by REML
  Data: domains_com_2 
       AIC      BIC    logLik
  262.8678 280.3437 -125.4339

Random effects:
 Formula: ~1 | pen
         (Intercept)  Residual
StdDev: 0.0001588075 0.5894999

Fixed effects:  phylogenetic ~ age + trial + overall_com_mci 
                    Value Std.Error DF    t-value p-value
(Intercept)      19.24326  2.656478 90   7.243901  0.0000
age               0.09722  0.060649 90   1.602937  0.1125
trialCB           0.34837  0.101108 46   3.445529  0.0012
overall_com_mci -52.93094  5.272811 90 -10.038468  0.0000
 Correlation: 
                (Intr) age    trilCB
age             -0.828              
trialCB          0.148 -0.121       
overall_com_mci -0.572  0.015 -0.120

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-2.54298266 -0.48022804  0.01396494  0.68418701  2.03727482 

Number of Observations: 140
Number of Groups: 48 
# Functional
Q <- lme(functional_kegg ~ age + trial + overall_com_mci,
         random = ~1|pen,
         data = domains_com_2)
summary(Q)
Linear mixed-effects model fit by REML
  Data: domains_com_2 
        AIC       BIC   logLik
  -518.8687 -501.3928 265.4344

Random effects:
 Formula: ~1 | pen
        (Intercept)   Residual
StdDev: 0.004408918 0.03300953

Fixed effects:  functional_kegg ~ age + trial + overall_com_mci 
                     Value  Std.Error DF   t-value p-value
(Intercept)      1.5837707 0.14937823 90 10.602420  0.0000
age              0.0058861 0.00340933 90  1.726471  0.0877
trialCB          0.0043668 0.00581201 46  0.751348  0.4563
overall_com_mci -1.4165522 0.29806837 90 -4.752441  0.0000
 Correlation: 
                (Intr) age    trilCB
age             -0.826              
trialCB          0.144 -0.119       
overall_com_mci -0.572  0.011 -0.117

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-2.3045841 -0.5938668 -0.1627596  0.4672308  3.9317166 

Number of Observations: 140
Number of Groups: 48 

11.7 Associate community MCI with chicken BW

mci <- lme(chicken_body_weight ~ age + trial + overall_com_mci,
         random = ~1|pen,
         data = domains_com_2)
summary(mci)
Linear mixed-effects model fit by REML
  Data: domains_com_2 
       AIC      BIC    logLik
  1965.926 1983.401 -976.9628

Random effects:
 Formula: ~1 | pen
        (Intercept) Residual
StdDev:    149.5935 279.2234

Fixed effects:  chicken_body_weight ~ age + trial + overall_com_mci 
                     Value Std.Error DF   t-value p-value
(Intercept)     -2265.3072 1312.0446 90 -1.726547  0.0877
age               138.5372   29.8369 90  4.643157  0.0000
trialCB            -0.8276   65.0269 46 -0.012727  0.9899
overall_com_mci -1747.0996 2741.7924 90 -0.637211  0.5256
 Correlation: 
                (Intr) age    trilCB
age             -0.807              
trialCB          0.108 -0.094       
overall_com_mci -0.576 -0.016 -0.095

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-1.9808054 -0.6163449 -0.1085188  0.5455396  2.5326459 

Number of Observations: 140
Number of Groups: 48 

11.8 Associate community weighted genome size with chicken BW

metadata_day_35 <-
  chicken_metadata %>%
  filter(sampling_time == "35")

mag_lengthed <- 
  round(sweep(total, MARGIN = 1, genome_stats$mag_length, `*`), 0) %>% 
  t() %>% 
  as.data.frame() %>%
  rownames_to_column(var = "animal_code") %>%
  rowwise() %>% 
  mutate(comm_length = mean(c_across(2:389))) %>% 
  select(animal_code, comm_length) %>% 
  filter(animal_code %in% metadata_day_35$animal_code) %>% 
  left_join(metadata_day_35, by = 'animal_code')

L <- lme(chicken_body_weight ~ scale(age) + trial + scale(log(comm_length)),
         random = ~ 1|pen,
         data = mag_lengthed)
summary(L)
Linear mixed-effects model fit by REML
  Data: mag_lengthed 
       AIC      BIC    logLik
  1975.692 1993.168 -981.8459

Random effects:
 Formula: ~1 | pen
        (Intercept) Residual
StdDev:    144.7197 281.2538

Fixed effects:  chicken_body_weight ~ scale(age) + trial + scale(log(comm_length)) 
                            Value Std.Error DF  t-value p-value
(Intercept)             2231.4630  46.49952 90 47.98894  0.0000
scale(age)               114.0616  24.97663 90  4.56673  0.0000
trialCB                   -1.2398  66.94795 46 -0.01852  0.9853
scale(log(comm_length))   -4.9378  29.89947 90 -0.16515  0.8692
 Correlation: 
                        (Intr) scl(g) trilCB
scale(age)               0.088              
trialCB                 -0.728 -0.113       
scale(log(comm_length))  0.211  0.070 -0.291

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-2.0015452 -0.6185148 -0.1001931  0.5667678  2.5817652 

Number of Observations: 140
Number of Groups: 48 
plot(L)

rm(list = ls())

12 Bacteria vs Chicken BW HMSC setup

HMSC models are computationally intensive. It is advisable to run this script on a server. The code is made to be reproduced on any supercomputer.

12.1 Load MG and MT data

12.1.1 Define directories

# Create directories
dir.create("hmsc_bw")
dir.create("hmsc_bw/model_fit")
dir.create("hmsc_bw/models")
dir.create("hmsc_bw/panels")

# Define paths
localDir = "."
ModelDir = file.path(localDir, "hmsc_bw/models")

12.1.2 Generate input objects

# Load data
load("data/data_mg.Rdata")

# Metadata summary
design <- 
  chicken_metadata[, c('animal_code', 'trial', 'pen', 'age','breed',
               'sex', 'treatment', 'chicken_body_weight')] %>% 
  column_to_rownames("animal_code")

design$log_seq_depth <- log(colSums(read_counts %>% column_to_rownames("genome")))
design$animal_code <- rownames(design)

for (i in 1:ncol(design)) {
  if (is.character(design[,i])) {
    design[,i] <- factor(design[,i])
  }
}

mean(rownames(design) == rownames(mag_weighted))

# Filter day 35
design_day35 <- 
  design %>%
  filter(age > 25)

mag_weighted_day35 <- 
  mag_weighted %>%
  rownames_to_column(var = 'genome') %>% 
  filter(genome %in% rownames(design_day35)) %>% 
  column_to_rownames(var = 'genome') %>% 
  t() %>% 
  as.data.frame()

dim(mag_weighted_day35)
dim(design_day35)
 
mean(rownames(design_day35) == rownames(mag_weighted_day35))

12.2 Prepate data for HMSC

12.2.1 Define tables

# Genome count table (quantitative community data)
YData <- log1p(mag_weighted_day35)

# Fixed effects data (explanatory variables)
XData <- design_day35
mean(rownames(YData) == rownames(XData))  # Ydata and XData in the same order

# Genome phylogeny
PData <- genome_tree

YData <- YData[,colnames(YData) %in% genome_tree$tip.label]  # Filter missing MAGs in tree

# Traits 
gifts_functions <- 
  gifts_elements %>%
  as.data.frame() %>% 
  rownames_to_column(var = 'genome') %>% 
  filter(genome %in% colnames(YData)) %>% 
  column_to_rownames(var = 'genome')

mean(gifts_functions$mag_id == colnames(YData))

TrData <- data.frame(MCI = TrData)
rownames(TrData) <- colnames(YData)

12.2.2 Define formulas

# Define TrFormula
TrFormula <- ~MCI

# Define XFormula
XFormula <- ~trial + age + chicken_body_weight + log_seq_depth

# Study Design
StudyDesign <- design[,c(2,9)]
rL.Pen <- HmscRandomLevel(units = levels(StudyDesign$pen))

12.2.3 Define HMSC model

m <- Hmsc(Y = YData,
                XData = XData,
                XFormula = XFormula,
                studyDesign = StudyDesign,
                ranLevels = list("pen" = rL.Pen),
                phyloTree = tree,
                TrData = TrData,
                TrFormula = TrFormula,
                distr = "normal",
                YScale = TRUE)

save(m, file = file.path(ModelDir,"unfitted_model.Rdata"))
rm(list = ls())

12.2.4 Define MCMC

# Chain characteristics
## How often to sample the MCM
samples_list = c(5, 250, 250)

# The number of MCMC steps between each recording sample
thin_list = c(1,1,10)

# The number of MCMC chains ot use
nChains = 4

# Load only the unfitted models
load(file.path(ModelDir,"unfitted_model.Rdata"))

unf_model <- m

12.3 Fit model

set.seed(1)

for (Lst in 1:length(samples_list)) {
  thin = thin_list[Lst]
  samples = samples_list[Lst]
  # fit the model
  m = sampleMcmc(unf_model,
                 samples = samples,
                 thin = thin,
                 adaptNf = rep(ceiling(0.4*samples*thin), unf_model$nr),
                 transient = ceiling(0.5*samples*thin),
                 nChains = nChains,
                 nParallel = nChains)
  # create file name
  filename = paste("bw_model",
                   "_thin_", as.character(thin),
                   "_samples_", as.character(samples),
                   "_chains_", as.character(nChains),
                   ".Rdata", sep = "")
  # save file
  save(model, file = file.path(ModelDir,filename)) 
}
rm(list = ls())

13 Bacteria vs Chicken BW HMSC analysis

13.1 Load model

13.1.1 Define directories

# Directory
localDir = "."
ModelDir = file.path(localDir, "hmsc_bw/models")
PanelDir = file.path(localDir, "hmsc_bw/panels")
MFDir <- file.path(localDir, "hmsc_bw/model_fit")

13.1.2 Load model

if(!file.exists("hmsc_bw/models/bw_model_thin_10_samples_250_chains_4.Rdata")){
  download.file(
    url = 'https://sid.erda.dk/share_redirect/Bd8UfDO2D6/bw_model_thin_10_samples_250_chains_4.Rdata',
    destfile = 'hmsc_bw/models/bw_model_thin_10_samples_250_chains_4.Rdata', method = 'curl')
}

load(file = file.path(ModelDir,"bw_model_thin_10_samples_250_chains_4.Rdata"))

13.2 Evaluate model

partition <- createPartition(m, nfolds = 4)
preds <- computePredictedValues(m, partition = partition, nChains = 1) # nParallel = nChains
MFCV <- evaluateModelFit(hM = m, predY = preds)

save(MFCV,file = file.path(MFDir, "hmsc_bw/model_fit/MF_thin_10_samples_250_chains_4.Rdata"))

13.3 Examine species responses

Plot not included in Supplementary

beta_post <- getPostEstimate(m, parName = "Beta")

plotBeta(m, beta_post,supportLevel = 0.9, plotTree = TRUE)

13.4 Examine variance partition

Plot not included in Supplementary

varpartition <- computeVariancePartitioning(m)

plotVariancePartitioning(m, VP = varpartition)

bw_var <- varpartition$vals[3,]
bw_param <- beta_post$mean[4,]
bw_support <- beta_post$support[4,]

13.5 Create a data table

load("hmsc_bw/model_fit/MF_thin_10_samples_250_chains_4.Rdata")

rel_abu <- colMeans(vegan::decostand(exp(m$Y), method = "total")) * 100

toplot <- data.frame(parameter = bw_param,
                     bw_support = bw_support,
                     var = bw_var,
                     pred = MFCV$R2,
                     var_pred = bw_var * MFCV$R2)

toplot$MAG <- rownames(toplot)
toplot$rel_abu <- rel_abu
rownames(toplot) <- NULL

write.table(toplot, file = "data/data.txt", row.names = F)

13.5.1 Associate bacteria response with chicken BW

not included in Supplementary

toplot <- read.table("data/data.txt", , row.names = F)

ggplot() +
  geom_point(data = toplot, aes(x = parameter, y = Pred), alpha = 0.7, shape = 16) +
  geom_point(data = toplot[toplot$bw_support > 0.975,],
             mapping = aes(x = parameter, y = Pred),
             color = "red", shape = 16) +
  geom_point(data = toplot[toplot$bw_support < 0.025,],
             mapping = aes(x = parameter, y = Pred),
             color = "blue", shape = 16) +
  geom_vline(xintercept = 0) +
  geom_hline(yintercept = mean(toplot$Pred, na.rm = TRUE), linetype = "dashed") +
  labs(x = "Parameter estimate", y = "Body weight's importance") +
  theme_minimal()

13.6 Create table with HMSC results

# Total relative abundance of species associated positively and negatively with body weight (~9%)
sum(toplot$rel_abu[(toplot$bw_support > 0.975 | toplot$bw_support < 0.025) & (toplot$var_pred > mean(toplot$var_pred))])

# Total relative abundance of species associated positively with body weight (<1%)
sum(toplot$rel_abu[(toplot$bw_support > 0.975) & (toplot$var_pred > mean(toplot$var_pred))])

# Total relative abundance of species associated negatively with body weight (>8%)
sum(toplot$rel_abu[(toplot$bw_support < 0.025 | toplot$bw_support < 0.025) & (toplot$var_pred > mean(toplot$var_pred))])

# Phylogenetic signal
mpost <- convertToCodaObject(m)
quantile(unlist(mpost$Rho), probs = c(.05,.5,.95))

bw_parameter <- beta_post$mean[4,]

bw_parameter_phyloSorted <-
  data.frame(bw_parameter = bw_parameter[
    match(m$phyloTree$tip.label,
          names(bw_parameter))])

mean(rownames(bw_parameter_phyloSorted) == m$phyloTree$tip.label)

new_tree <- m$phyloTree
new_tree$node.label <- NULL

obj <- phylo4d(new_tree, tip.data = bw_parameter_phyloSorted)
barplot.phylo4d(obj)

bw.cg <- phyloCorrelogram(obj, trait = "bw_parameter")
saveRDS(bw.cg,file = file.path(PanelDir,"bw.cg.rds"))
bw.cg <- readRDS(file = file.path(PanelDir,"bw.cg.rds"))

plot(bw.cg)

bw_parameter_df <- data.frame(genome = names(bw_parameter), bw_response = bw_parameter)
bw_parameter_df$bw_trend <- NA
bw_parameter_df$bw_trend[beta_post$support[4,] > 0.95] <- "positive"
bw_parameter_df$bw_trend[beta_post$support[4,] < 0.05] <- "negative"
bw_parameter_df$bw_trend[is.na(bw_parameter_df$bw_trend)] <- "neutral"

write_tsv(bw_parameter_df,file = "data/hmsc_bw_trend.tsv")
rm(list = ls())

14 Comparing metabolic capacity of positively and negatively associated bacteria

14.1 Load MG data

load("data/data_mg.Rdata")

trends_bw <-
  read_tsv("data/hmsc_bw_trend.tsv") %>% 
  mutate(trend_07 = case_when(bw_support > 0.85 ~ 'positive',
                           bw_support < 0.15 ~ 'negative',
                           TRUE ~ 'non-significant')) %>% 
  mutate(trend_09 = case_when(bw_support > 0.95 ~ 'positive',
                           bw_support < 0.05 ~ 'negative',
                           TRUE ~ 'non-significant'))

gifts <-
  gifts_elements %>%
  as.data.frame() %>% 
  select(where(~ sum(.) > 0))

gifts_funct <-
  gifts_functions %>%
  as.data.frame() %>% 
  select(where(~ sum(.) > 0))

avg_mci <- rowMeans(gifts)

gifts_funct$avg_mci <- avg_mci

avg_mci_bio <- rowMeans(gifts[,grepl("B",names(gifts))])
gifts_funct$avg_mci_bio <- avg_mci_bio

avg_mci_deg <- rowMeans(gifts[,grepl("D",names(gifts))])
gifts_funct$avg_mci_deg <- avg_mci_deg


gift_colors <- 
  read_tsv("data/gift_colors.tsv") %>% 
  mutate(legend = str_c(Code_function, " - ", Function))

14.2 Taxonomy of positively and negatively associated MAGs

14.2.1 Positive group

All MAGs with positive association belong to RF39

trends_bw %>%
  filter(trend_09 == "positive") %>%
  select(genome) %>% 
  left_join(genome_taxonomy, by = 'genome') %>% 
  select(order) %>% 
  table()
order
RF39 
  10 

14.2.2 Negative group

MAGs with negative association belong to various orders, mainly to Lachnospirales and Oscillospirales

trends_bw %>%
  filter(trend_09 == "negative") %>%
  select(genome) %>%
  left_join(genome_taxonomy, by = 'genome') %>% 
  select(order) %>% 
  table() %>% 
  sort()
order
   Actinomycetales  Campylobacterales     Monoglobales_A         Opitutales               RF39  Saccharimonadales               RF32             TANB77 
                 1                  1                  1                  1                  1                  1                  4                  4 
  Coriobacteriales      Bacteroidales Erysipelotrichales    Lactobacillales Christensenellales    Oscillospirales     Lachnospirales 
                 5                  8                  8                 12                 27                 35                 81 

14.3 Compute MCIs of BW positive and negative associated communities

gifts_funct_bw_pos <-
  gifts_funct %>%
  mutate(avg_mci_bio = gifts_funct$avg_mci_bio,
         avg_mci_deg = gifts_funct$avg_mci_deg,
         bw_effect = trends_bw$trend_09) %>%
  filter(bw_effect == "positive") %>%
  select(-bw_effect)

gifts_funct_bw_neu <- 
  gifts_funct %>%
  mutate(avg_mci_bio = gifts_funct$avg_mci_bio,
         avg_mci_deg = gifts_funct$avg_mci_deg, 
         bw_effect = trends_bw$trend_09) %>%
  filter(bw_effect == "non-significant") %>%
  select(-bw_effect)

gifts_funct_bw_neg <- 
  gifts_funct %>%
  mutate(avg_mci_bio = gifts_funct$avg_mci_bio,
         avg_mci_deg = gifts_funct$avg_mci_deg, 
         bw_effect = trends_bw$trend_09) %>%
  filter(bw_effect == "negative") %>%
  select(-bw_effect)
mean_func <- function(data, indices) {
  return(mean(data[indices]))
}

boot_pos_df <- data.frame(matrix(nrow = ncol(gifts_funct_bw_pos), ncol = 4))
colnames(boot_pos_df) <- c("function_id", "mean", "ci_05", "ci_95")

boot_neu_df <- data.frame(matrix(nrow = ncol(gifts_funct_bw_neu), ncol = 4))
colnames(boot_neu_df) <- c("function_id", "mean", "ci_05", "ci_95")

boot_neg_df <- data.frame(matrix(nrow = ncol(gifts_funct_bw_neg), ncol = 4))
colnames(boot_neg_df) <- c("function_id", "mean", "ci_05", "ci_95")

for(i in 1:ncol(gifts_funct_bw_pos)){
    boot_pos_df[i, "function_id"] <- colnames(gifts_funct_bw_pos)[i]
    
    if(sum(gifts_funct_bw_pos[, i]) == 0){
      boot_pos_df[i, "mean"] <- 0
      boot_pos_df[i, "ci_05"] <- 0
      boot_pos_df[i, "ci_95"] <- 0
      }
    else{
      boot_temp_pos <- boot(gifts_funct_bw_pos[, i], statistic = mean_func, R = 10000)
      boot_temp_pos_ci <- boot.ci(boot_temp_pos, type = "bca")
      boot_pos_df[i, "mean"] <- boot_temp_pos_ci$t0
      boot_pos_df[i, "ci_05"] <- boot_temp_pos_ci$bca[4]
      boot_pos_df[i, "ci_95"] <- boot_temp_pos_ci$bca[5]
      }
    boot_neu_df[i, "function_id"] <- colnames(gifts_funct_bw_neu)[i]
    boot_temp_neu <- boot(gifts_funct_bw_neu[, i], statistic = mean_func, R = 10000)
    boot_temp_neu_ci <- boot.ci(boot_temp_neu, type = "bca")
    boot_neu_df[i, "mean"] <- boot_temp_neu_ci$t0
    boot_neu_df[i, "ci_05"] <- boot_temp_neu_ci$bca[4]
    boot_neu_df[i, "ci_95"] <- boot_temp_neu_ci$bca[5]
    
    boot_neg_df[i, "function_id"] <- colnames(gifts_funct_bw_neg)[i]
    boot_temp_neg <- boot(gifts_funct_bw_neg[, i], statistic = mean_func, R = 10000)
    boot_temp_neg_ci <- boot.ci(boot_temp_neg, type = "bca")
    boot_neg_df[i, "mean"] <- boot_temp_neg_ci$t0
    boot_neg_df[i, "ci_05"] <- boot_temp_neg_ci$bca[4]
    boot_neg_df[i, "ci_95"] <- boot_temp_neg_ci$bca[5]
    }

boot_df <- data.frame(rbind(boot_pos_df %>% mutate(bw_association = "postive"),
                            boot_neu_df %>% mutate(bw_association = "non-significant"),
                            boot_neg_df %>% mutate(bw_association = "negative")))  

gifts_funct_df <- data.frame(rbind(gifts_funct_bw_pos %>% mutate(bw_association = "postive"),
                                   gifts_funct_bw_neu %>% mutate(bw_association = "non-significant"),
                                   gifts_funct_bw_neg %>% mutate(bw_association = "negative")))

14.3.1 MCI comparisons of biosynthesis pathways

## B01
p_B01_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B01, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B01, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "B01"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))

## B02
p_B02_mg <- 
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B02, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B02, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "B02"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14)) 

## B03
p_B03_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B03, color = bw_association),
              alpha = 0.5, width = 0.25) + 
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B03, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data=boot_df %>% filter(function_id=="B03"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association,color=bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35))+
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  theme_minimal()+
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))  

## B04
p_B04_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B04, color = bw_association),
              alpha = 0.5, width = 0.25)+
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B04, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "B04"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))  

## B06
p_B06_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B06, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B06, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "B06"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))

## B07
p_B07_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B07, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B07, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "B07"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))

## B08
p_B08_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B08, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B08, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "B08"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))

## B09
p_B09_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B09, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B09, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "B09"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))

## B10
p_B10_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = B10, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = B10, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "B10"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))

## Avg. MCI Biosynthesis
p_Mci_bio_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = avg_mci_bio, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = avg_mci_bio, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "avg_mci_bio"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") + 
  ylab("Avg. bio. MCI") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))

## Avg. MCI
p_Mci_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = avg_mci, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = avg_mci, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "avg_mci"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  ylab("Avg. MCI") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))

14.3.2 The significant ones

grid.arrange(p_Mci_mg, p_B04_mg, p_B08_mg, ncol = 3)

14.3.3 All

sup_s4 <- grid.arrange(p_Mci_bio_mg, p_B01_mg, p_B02_mg,
             p_B03_mg, p_B04_mg, p_B06_mg, 
             p_B07_mg, p_B08_mg, p_B09_mg,
             p_B10_mg, ncol = 3)

ggsave("sup_s4.pdf", plot = sup_s4, width = 20, height = 22, units = "cm", dpi = 600)

14.3.3.1 MCI comparisons of degradation pathways

## D01
p_D01_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = D01, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = D01, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "D01"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) + 
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))

## D02
p_D02_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = D02, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = D02, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "D02"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14)) 

## D03
p_D03_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = D03, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = D03, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "D03"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size =14))  

## D05
p_D05_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = D05, color = bw_association),
              alpha = 0.5,width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = D05, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "D05"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))

## D06
p_D06_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = D06, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = D06, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id  == "D06"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))

## D07
p_D07_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = D07, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = D07, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "D07"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))

## D08
p_D08_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = D08, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = D08, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "D08"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))

## D09
p_D09_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = D09, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = D09, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "D09"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))

## Avg. MCI Degradation
p_Mci_deg_mg <-
  ggplot() +
  geom_jitter(data = gifts_funct_df, aes(x = bw_association, y = avg_mci_deg, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(data = gifts_funct_df, aes(x = bw_association, y = avg_mci_deg, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_df %>% filter(function_id == "avg_mci_deg"),
                 aes(ymin = ci_05, ymax = ci_95, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","grey50","blue3")) +
  scale_fill_manual(values = c("red3","grey50","blue3")) +
  xlab("Body-weight association") +
  ylab("Avg. deg. MCI") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 14),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 12),
        axis.title.y = element_text(size = 14))

14.3.4 All

grid.arrange(p_Mci_deg_mg, p_D01_mg, p_D02_mg,
             p_D03_mg, p_D05_mg,
             p_D06_mg, p_D07_mg, p_D08_mg,
             p_D09_mg, ncol = 3)

14.4 Heatmap of MCI at element level

Not included in Supplementary.

gifts_bw_pos <-
  gifts %>%
  mutate(bw_effect = trends_bw$trend_09) %>%
  filter(bw_effect == "positive") %>%
  select(-bw_effect)

gifts_bw_neu <-
  gifts %>%
  mutate(bw_effect = trends_bw$trend_09) %>%
  filter(bw_effect == "non-significant") %>%
  select(-bw_effect)

n_neg_sp <- 50

gifts_bw_neg <-
  gifts %>%
  mutate(bw_effect = trends_bw$trend_09, bw_param = trends_bw$parameter) %>%
  filter(bw_effect == "negative") %>%
  arrange(bw_param) %>%
  slice(1:n_neg_sp) %>%
  select(-c(bw_effect, bw_param))

gifts_bw_df_long <- 
  data.frame(rbind(gifts_bw_pos, gifts_bw_neg)) %>%
  mutate(bw_association = c(rep("positive", nrow(gifts_bw_pos)), rep("negative", nrow(gifts_bw_neg)))) %>%
  select(-matches("S0")) %>%
  rownames_to_column(var = "genome") %>%
  pivot_longer(!c(genome, bw_association), names_to = "Element", values_to = "mci") %>%
  mutate(Function = substr(Element, start = 1, stop = 3)) %>%
  relocate(Function, .after = Element)

# Heatmap
gifts_bw_df_long %>%
  rename(Code_element = "Element") %>% 
  rename(Code_function = "Function") %>% 
  left_join(gift_colors, by = "Code_function") %>% 
  group_by(Code_element) %>%
  filter(sum(mci) > 0) %>%
  ungroup() %>%
  ggplot(., aes(x = genome, y = Code_element, fill = bw_association, group = legend, alpha = mci)) +
    geom_tile(color = "#ffffff") +
    scale_y_discrete(guide = guide_axis(check.overlap = TRUE)) +
    scale_x_discrete(guide = guide_axis(check.overlap = TRUE)) +
    scale_fill_manual(values = c("red3",
                                 "blue3"),
                      na.value = "#f4f4f4") +
    facet_grid(legend ~ bw_association, scales = "free", space = "free") +
    theme_void(base_size = 8) +
    theme(axis.text.y = element_blank(),
          strip.text.y = element_blank(),
          axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size =  6),
          legend.position = "none")

rm(list = ls())

15 Comparing activity of positively and negatively associated bacteria

15.1 Load data

load("data/data_mt.Rdata")
load("data/data_mg.Rdata")

trends_bw <-
  read_tsv("data/hmsc_bw_trend.tsv") %>% 
  mutate(trend_07 = case_when(bw_support > 0.85 ~ 'positive',
                           bw_support < 0.15 ~ 'negative',
                           TRUE ~ 'non-significant')) %>% 
  mutate(trend_09 = case_when(bw_support > 0.95 ~ 'positive',
                           bw_support < 0.05 ~ 'negative',
                           TRUE ~ 'non-significant'))


gift_colors <- read_tsv("data/gift_colors.tsv") %>% 
  mutate(legend = str_c(Code_function, " - ", Function))

15.2 Expression profiles of positively and negatively associated bacteria

The resulting plot corresponds to Figure 3c in the manuscript.

15.2.1 Element-based expression table of day 35 individuals

mag_elements <- lapply(expr_counts, function(x) to.elements(x, GIFT_db2))

# Convert correct tibble for applying CLR conversion
mag_elements_merged <- 
  mag_elements %>%
  lapply(function(x) t(x)) %>%
  lapply(function(x) as.data.frame(x)) %>%
  Map(cbind, ., MAG = names(.)) %>%
  lapply(function(x) rownames_to_column(x, "Element")) %>% 
  do.call(rbind, .) %>%
  mutate(Function = substr(Element, start = 1, stop = 3))%>%
  as.data.frame() %>% 
  relocate(MAG, .before = Element) %>%
  relocate(Function, .after = Element)

# Select individuals from day 35
metadata_day_35 <- 
  chicken_metadata_mt %>% 
  filter(sampling_time == "35")

mag_elements_day_35 <- 
  mag_elements_merged %>%
  select(MAG, Element, Function, metadata_day_35$animal_code)

metadata_mt <- 
  metadata_day_35 %>%
  arrange(match(metadata_day_35$animal_code, colnames(mag_elements_day_35[,-c(1,3)])))

mean(metadata_mt$animal_code == colnames(mag_elements_day_35[,-c(1:3)]))
[1] 1

15.2.2 Normalised expression table

normalisation_factor <- 
  expr_counts %>% # Normalisation is performed with dist_expr because it contains the transcripts for all the functions, not only the functions considered important for the host in distillR.
  lapply(function(x) rowSums(x)) %>%
  reduce(cbind) %>%
  rowSums()

normalisation_factor <- normalisation_factor[match(metadata_mt$animal_code,names(normalisation_factor))]

mean(names(normalisation_factor) == metadata_mt$animal_code)
[1] 1
# Normalised relative expression
norm_elements_day_35 <- sweep(mag_elements_day_35[,-c(1:3)], 2, normalisation_factor, FUN = "/")
norm_elements_day_35 <- data.frame(mag_elements_day_35[,c(1:3)], norm_elements_day_35)

15.2.3 Filter MAGs associated with chicken body weight

positive_mags <- 
  trends_bw %>%
  filter(trend_09 == "positive") %>%
  select(genome) %>%
  unlist() %>%
  as.character()

negative_mags <- 
  trends_bw %>%
  filter(trend_09 == "negative") %>%
  select(genome) %>%
  unlist() %>%
  as.character()

n_neg_sp <- 50 # Filter 50 negative MAGs for the figure
negative_mags_top50 <- 
  trends_bw %>%
  filter(trend_09 == "negative") %>%
  arrange(parameter) %>%
  slice(1:n_neg_sp) %>%
  select(genome) %>%
  unlist() %>%
  as.character()

mag_subset_elements <-
  norm_elements_day_35 %>%
  filter(MAG %in% c(positive_mags, negative_mags))

mag_subset_element_top50s <-
  norm_elements_day_35 %>%
  filter(MAG %in% c(positive_mags, negative_mags_top50))

15.2.4 Tidy data

# Filter relative abundance table
mag_weighted_day35 <- 
  mag_weighted %>%
  t() %>% 
  as.data.frame() %>% 
  rownames_to_column(var = 'animal_code') %>% 
  filter(animal_code %in% colnames(mag_subset_elements[, -c(1:3)])) %>%
  column_to_rownames(var = "animal_code")
  
total_day35 <- 
  t(decostand(mag_weighted_day35, "total")) %>%
  as.data.frame() %>%
  rownames_to_column(var = "genome") %>%
  filter(genome %in% c(positive_mags, negative_mags)) %>%
  column_to_rownames(var = "genome") %>%
  mutate(mean_abundance = rowMeans(.)) %>%
  rownames_to_column(var = "genome") %>%
  select(genome, mean_abundance)

mag_subset_elements_2 <-
  mag_subset_element_top50s %>%
  rename(genome = 'MAG') %>% 
  left_join(total_day35, by = "genome") %>%
  mutate(across(CA01.15:CB24.14, ~ .x / mean_abundance)) %>%
  select(-mean_abundance) %>%
  mutate(bw_association = if_else(genome %in% positive_mags, "positive", "negative")) %>%
  mutate(avg_expr_million = rowMeans(across(CA01.15:CB24.14)) * 1000000) %>%
  filter(!grepl("S", Function)) %>%
  group_by(Element) %>%
  filter(sum(avg_expr_million) > 0) %>%
  ungroup() %>%
  group_by(genome) %>%
  filter(sum(avg_expr_million) > 0) %>%
  ungroup()

mag_subset_elements_3 <-
  mag_subset_elements %>%
  rename(genome = 'MAG') %>% 
  left_join(total_day35, by = "genome") %>%
  mutate(across(CA01.15:CB24.14, ~ .x / mean_abundance)) %>%
  select(-mean_abundance) %>%
  mutate(bw_association = if_else(genome %in% positive_mags, "positive", "negative")) %>%
  mutate(avg_expr_million = rowMeans(across(CA01.15:CB24.14)) * 1000000) %>%
  filter(!grepl("S", Function)) %>%
  group_by(Element) %>%
  filter(sum(avg_expr_million) > 0) %>%
  ungroup() %>%
  group_by(genome) %>%
  filter(sum(avg_expr_million) > 0) %>%
  ungroup()

15.2.5 Plot

mag_subset_elements_2 %>%
  rename(Code_function = "Function") %>% 
  left_join(gift_colors, by = "Code_function") %>%
  ggplot(aes(x = genome, y = Element, fill = bw_association, group = legend, alpha = log(avg_expr_million, 2))) +
  geom_tile(color = "#ffffff") +
  scale_y_discrete(guide = guide_axis(check.overlap = TRUE)) +
  scale_x_discrete(guide = guide_axis(check.overlap = TRUE)) +
  scale_fill_manual(values = c("red3",
                               "blue3"),
                    na.value = "#f4f4f4") +
  facet_grid(legend ~ bw_association, scales = "free", space = "free") +
  theme_void(base_size = 8) +
  theme(axis.text.x = element_text(size = 5, angle = 90),
        axis.text.y = element_text(size = 5),
        strip.text.y = element_blank(),
        legend.position = "none")

15.3 Taxonomy of reduced MAGs

reduced_mags1 <-
  mag_subset_elements_3 %>%
  filter(Element == "B0101") %>%
  filter(avg_expr_million == 0) %>%
  select(genome) %>%
  unlist() %>%
  as.character()

reduced_mags2 <- 
  mag_subset_elements_3 %>%
  filter(Element == "B0102") %>%
  filter(avg_expr_million == 0) %>%
  select(genome) %>%
  unlist() %>%
  as.character()

reduced_mags3 <-
  mag_subset_elements_3 %>%
  filter(Element == "B0103") %>%
  filter(avg_expr_million == 0) %>%
  select(genome) %>%
  unlist() %>%
  as.character()

reduced_mags <- intersect(intersect(reduced_mags1, reduced_mags2), reduced_mags3)

reduced_mags_df <-
  mag_subset_elements_3 %>%
  filter(genome %in% reduced_mags) %>%
  group_by(Element) %>%
  filter(sum(avg_expr_million) > 0) %>%
  ungroup() %>%
  group_by(genome) %>%
  filter(sum(avg_expr_million) > 0) %>%
  ungroup()%>%
  pivot_wider(id_cols = c(genome, bw_association), names_from = Element, values_from = avg_expr_million)

All reduced MAGs with positive association belong to RF39

positives <-
  reduced_mags_df %>%
  filter(bw_association == "positive") %>%
  select(genome) %>%
  unlist() %>%
  as.character()

genome_taxonomy[genome_taxonomy$genome %in% positives,] %>%
  select(order) %>%
  table()
order
RF39 
  10 
genome_taxonomy[genome_taxonomy$genome %in% positives,] %>%
  select(family) %>%
  table()
family
UBA660 
    10 

Reduced MAGs with negative association belong to various orders, mainly to Christensenellales

negatives <-
  reduced_mags_df %>%
  filter(bw_association == "negative") %>%
  select(genome) %>%
  unlist() %>%
  as.character()

genome_taxonomy[genome_taxonomy$genome %in% negatives,] %>%
  select(order) %>%
  table()
order
     Bacteroidales Christensenellales   Coriobacteriales     Lachnospirales     Monoglobales_A    Oscillospirales  Saccharimonadales 
                 1                 12                  1                  1                  1                  1                  1 
genome_taxonomy[genome_taxonomy$genome %in% negatives,] %>%
  select(family) %>%
  table()
family
Acutalibacteraceae       Atopobiaceae     Borkfalkiaceae    Lachnospiraceae      Rikenellaceae Saccharimonadaceae            UBA1242            UBA1381 
                 1                  1                  1                  1                  1                  1                 11                  1 

15.4 Genomic characteristics of genome reduced MAGs

The resulting plot corresponds to Figure 4a.

gifts <-
  gifts_elements %>%
  as.data.frame() %>% 
  select(where(~ sum(.) > 0))

mci_df <-
  gifts %>%
  filter(rownames(.) %in% reduced_mags_df$genome) %>%
  mutate(avg_mci = rowMeans(.)) %>%
  rownames_to_column(var = "genome") %>%
  left_join(reduced_mags_df, by = "genome") %>%
  left_join(genome_taxonomy, by = "genome") %>%
  filter(family=="UBA1242"| order=="RF39") %>%
  select(genome, avg_mci, bw_association)

length_df <- 
  genome_stats %>%
  filter(genome %in% reduced_mags_df$genome) %>%
  left_join(mci_df, by = "genome") %>%
  left_join(genome_taxonomy, by = "genome") %>%
  filter(family=="UBA1242"|order=="RF39") %>%
  select(genome, mag_length, bw_association)


mean_func <- function(data, indices) {
  return(mean(data[indices]))
}

boot_mci_df <- data.frame(matrix(nrow = 2, ncol = 4))

colnames(boot_mci_df) <-c ("bw_association","mean","ci_025","ci_975")
boot_mci_df$bw_association <- c("positive","negative")

bw_pos_redMAG_mci_boot <- boot(mci_df$avg_mci[mci_df$bw_association == "positive"], statistic = mean_func, R = 10000)
bw_pos_redMAG_mci_boot_ci <- boot.ci(bw_pos_redMAG_mci_boot, type = "bca")

bw_neg_redMAG_mci_boot<-boot(mci_df$avg_mci[mci_df$bw_association == "negative"], statistic = mean_func, R = 10000)
bw_neg_redMAG_mci_boot_ci<-boot.ci(bw_neg_redMAG_mci_boot, type = "bca")

boot_mci_df[1,"mean"] <- bw_pos_redMAG_mci_boot$t0
boot_mci_df[1,"ci_025"] <- bw_pos_redMAG_mci_boot_ci$bca[,4]
boot_mci_df[1,"ci_975"] <- bw_pos_redMAG_mci_boot_ci$bca[,5]

boot_mci_df[2,"mean"] <- bw_neg_redMAG_mci_boot$t0
boot_mci_df[2,"ci_025"] <- bw_neg_redMAG_mci_boot_ci$bca[,4]
boot_mci_df[2,"ci_975"] <- bw_neg_redMAG_mci_boot_ci$bca[,5]
mci_df %>%
  ggplot()+
  geom_jitter(aes(x = bw_association, y = avg_mci, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(aes(x = bw_association, y = avg_mci, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_mci_df,
                 aes(ymin = ci_025, ymax = ci_975, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","blue3")) +
  scale_fill_manual(values = c("red3","blue3")) +
  ylab("Avg. MCI")+
  xlab("Body-weight association")+
  theme_minimal()+
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 12),
        axis.text.x = element_text(size = 10),
        axis.text.y = element_text(size = 10),
        axis.title.y = element_text(size = 12)) 

15.4.1 Genome length of genome reduced MAGss

The resulting plot corresponds to Figure 4b.

boot_length_df <- data.frame(matrix(nrow = 2, ncol = 4))

colnames(boot_length_df) <- c("bw_association", "mean", "ci_025", "ci_975")
boot_length_df$bw_association <- c("positive", "negative")

bw_pos_red_mag_length_boot <- boot(length_df$mag_length[length_df$bw_association == "positive"], statistic = mean_func, R = 10000)
bw_pos_red_mag_length_boot_ci <- boot.ci(bw_pos_red_mag_length_boot, type = "bca")

bw_neg_red_mag_length_boot <- boot(length_df$mag_length[length_df$bw_association == "negative"], statistic = mean_func, R = 10000)
bw_neg_red_mag_length_boot_ci <- boot.ci(bw_neg_red_mag_length_boot, type = "bca")

boot_length_df[1, "mean"] <- bw_pos_red_mag_length_boot$t0
boot_length_df[1, "ci_025"] <- bw_pos_red_mag_length_boot_ci$bca[,4]
boot_length_df[1, "ci_975"] <- bw_pos_red_mag_length_boot_ci$bca[,5]

boot_length_df[2, "mean"] <- bw_neg_red_mag_length_boot$t0
boot_length_df[2, "ci_025"] <- bw_neg_red_mag_length_boot_ci$bca[,4]
boot_length_df[2, "ci_975"] <- bw_neg_red_mag_length_boot_ci$bca[,5]
length_df %>%
  ggplot() +
  geom_jitter(aes(x = bw_association, y = mag_length, color = bw_association),
              alpha = 0.5, width = 0.25) +
  geom_boxplot(aes(x = bw_association, y = mag_length, fill = bw_association),
               alpha = 0.1, outlier.shape = NA, width = 0.5) +
  geom_linerange(data = boot_length_df,
                 aes(ymin = ci_025, ymax = ci_975, x = bw_association, color = bw_association),
                 linewidth = 4, size = 1,position = position_nudge(x = 0.35)) +
  scale_color_manual(values = c("red3","blue3")) +
  scale_fill_manual(values = c("red3","blue3")) +
  ylab("MAG length") +
  xlab("Body-weight association") +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 12),
        axis.text.x = element_text(size = 10),
        axis.text.y = element_text(size = 10),
        axis.title.y = element_text(size = 12)) 

15.4.2 Expression profiles of genome reduced MAGs

The resulting plot corresponds to Figure 4c.

comp_reduced_mags <- 
  reduced_mags_df %>% 
  left_join(genome_taxonomy, by = 'genome') %>%
  filter(family=="UBA1242"|order=="RF39") %>%
  select(-domain:-species)

reduced_mags_dist <- vegdist(sqrt(comp_reduced_mags[, -c(1:3)]), method = "bray")
pcoa_reduced_mags <- cmdscale(reduced_mags_dist, eig = TRUE, k = 3, add = TRUE)

pcoa_reduced_mags_df <-
  pcoa_reduced_mags$points %>%
  as.data.frame() %>%
  mutate(genome = comp_reduced_mags$genome) %>%
  mutate(bw_association = comp_reduced_mags$bw_association) %>%
  rename(PCOA1 = 'V1', PCOA2 = 'V2', PCOA3 = 'V3')

envfit_gifts <- envfit(pcoa_reduced_mags_df[, 1:2], sqrt(comp_reduced_mags[, -c(1,2)]), permutations = 999)

envfit_gifts_df <-
  as.data.frame(scores(envfit_gifts, display = "vectors")) %>%
  rownames_to_column(.,var = "gift_id") %>%
  mutate(pval = envfit_gifts$vectors$pvals) %>%
  filter(pval < 0.05) %>%
  select(gift_id, PCOA1, PCOA2)
ggplot() +
  geom_point(data = pcoa_reduced_mags_df, 
             aes(x = PCOA1, y = PCOA2, color = bw_association), 
             size = 2, 
             shape = 16, 
             alpha = 0.8) +
  scale_color_manual(values = c("red3","blue3")) +
  geom_segment(data = envfit_gifts_df, 
               aes(x = 0, xend = PCOA1*0.5, y = 0 , yend = PCOA2*0.5),
               arrow = arrow(length = unit(0.25, "cm")),
               color="gray20",
               linewidth = 0.2) +
  geom_label_repel(data = envfit_gifts_df, aes(x = PCOA1*0.5, y = PCOA2*0.5, label = gift_id), size = 1, max.overlaps = 10) +
  theme_minimal() +
  theme(legend.position = 'none',
        axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 12),
        axis.text.x = element_text(size = 10),
        axis.text.y = element_text(size = 10))

15.4.3 Activity heatmap of genome reduced bacteria

mag_subset_elements_3 %>%
  rename(Code_element = "Element") %>% 
  rename(Code_function = "Function") %>% 
  left_join(GIFT_db2 %>%  select (Code_element, Code_function, Element, Function)) %>%
  left_join(gift_colors, by = "Code_function") %>% 
  left_join(genome_taxonomy, by = 'genome') %>%
  filter(family == "UBA1242" | family == "UBA660") %>%
  select(-domain:-species) %>% 
  group_by(Element) %>%
  filter(sum(avg_expr_million) > 0) %>%
  ungroup() %>%
  group_by(genome) %>%
  filter(sum(avg_expr_million) > 0) %>%
  ungroup() %>%
  ggplot(aes(x = genome,
             y = Code_element,
             fill = bw_association,
             group = legend,
             alpha = log(avg_expr_million, 2))) +
  geom_tile(color = "#ffffff") +
  scale_y_discrete(guide = guide_axis(check.overlap = TRUE)) +
  scale_x_discrete(guide = guide_axis(check.overlap = TRUE)) +
  scale_fill_manual(values = c("red3",
                               "blue3"),
                    na.value = "#f4f4f4") +
  facet_grid(legend ~ bw_association, scales = "free", space = "free") +
  theme_void(base_size = 8) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1),
        axis.text.y = element_text(size = 5, vjust = 0, hjust = 1),
        strip.text.y = element_blank(),
        legend.position = "none")

rm(list = ls())

  1. University of Copenhagen, ↩︎